.text


.globl	_aesni_cbc_sha256_enc

.p2align	4
_aesni_cbc_sha256_enc:
	leaq	_OPENSSL_ia32cap_P(%rip),%r11
	movl	$1,%eax
	cmpq	$0,%rdi
	je	L$probe
	movl	0(%r11),%eax
	movq	4(%r11),%r10
	btq	$61,%r10
	jc	aesni_cbc_sha256_enc_shaext
	movq	%r10,%r11
	shrq	$32,%r11

	testl	$2048,%r10d
	jnz	aesni_cbc_sha256_enc_xop
	andl	$296,%r11d
	cmpl	$296,%r11d
	je	aesni_cbc_sha256_enc_avx2
	andl	$268435456,%r10d
	jnz	aesni_cbc_sha256_enc_avx
	ud2
	xorl	%eax,%eax
	cmpq	$0,%rdi
	je	L$probe
	ud2
L$probe:
	.byte	0xf3,0xc3


.p2align	6

K256:
.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2

.long	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
.long	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
.long	0,0,0,0,   0,0,0,0,   -1,-1,-1,-1
.long	0,0,0,0,   0,0,0,0
.byte	65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.p2align	6

.p2align	6
aesni_cbc_sha256_enc_xop:
L$xop_shortcut:
	movq	8(%rsp),%r10
	pushq	%rbx
	pushq	%rbp
	pushq	%r12
	pushq	%r13
	pushq	%r14
	pushq	%r15
	movq	%rsp,%r11
	subq	$128,%rsp
	andq	$-64,%rsp

	shlq	$6,%rdx
	subq	%rdi,%rsi
	subq	%rdi,%r10
	addq	%rdi,%rdx


	movq	%rsi,64+8(%rsp)
	movq	%rdx,64+16(%rsp)

	movq	%r8,64+32(%rsp)
	movq	%r9,64+40(%rsp)
	movq	%r10,64+48(%rsp)
	movq	%r11,64+56(%rsp)
L$prologue_xop:
	vzeroall

	movq	%rdi,%r12
	leaq	128(%rcx),%rdi
	leaq	K256+544(%rip),%r13
	movl	240-128(%rdi),%r14d
	movq	%r9,%r15
	movq	%r10,%rsi
	vmovdqu	(%r8),%xmm8
	subq	$9,%r14

	movl	0(%r15),%eax
	movl	4(%r15),%ebx
	movl	8(%r15),%ecx
	movl	12(%r15),%edx
	movl	16(%r15),%r8d
	movl	20(%r15),%r9d
	movl	24(%r15),%r10d
	movl	28(%r15),%r11d

	vmovdqa	0(%r13,%r14,8),%xmm14
	vmovdqa	16(%r13,%r14,8),%xmm13
	vmovdqa	32(%r13,%r14,8),%xmm12
	vmovdqu	0-128(%rdi),%xmm10
	jmp	L$loop_xop
.p2align	4
L$loop_xop:
	vmovdqa	K256+512(%rip),%xmm7
	vmovdqu	0(%rsi,%r12,1),%xmm0
	vmovdqu	16(%rsi,%r12,1),%xmm1
	vmovdqu	32(%rsi,%r12,1),%xmm2
	vmovdqu	48(%rsi,%r12,1),%xmm3
	vpshufb	%xmm7,%xmm0,%xmm0
	leaq	K256(%rip),%rbp
	vpshufb	%xmm7,%xmm1,%xmm1
	vpshufb	%xmm7,%xmm2,%xmm2
	vpaddd	0(%rbp),%xmm0,%xmm4
	vpshufb	%xmm7,%xmm3,%xmm3
	vpaddd	32(%rbp),%xmm1,%xmm5
	vpaddd	64(%rbp),%xmm2,%xmm6
	vpaddd	96(%rbp),%xmm3,%xmm7
	vmovdqa	%xmm4,0(%rsp)
	movl	%eax,%r14d
	vmovdqa	%xmm5,16(%rsp)
	movl	%ebx,%esi
	vmovdqa	%xmm6,32(%rsp)
	xorl	%ecx,%esi
	vmovdqa	%xmm7,48(%rsp)
	movl	%r8d,%r13d
	jmp	L$xop_00_47

.p2align	4
L$xop_00_47:
	subq	$-32*4,%rbp
	vmovdqu	(%r12),%xmm9
	movq	%r12,64+0(%rsp)
	vpalignr	$4,%xmm0,%xmm1,%xmm4
	rorl	$14,%r13d
	movl	%r14d,%eax
	vpalignr	$4,%xmm2,%xmm3,%xmm7
	movl	%r9d,%r12d
	xorl	%r8d,%r13d
.byte	143,232,120,194,236,14
	rorl	$9,%r14d
	xorl	%r10d,%r12d
	vpsrld	$3,%xmm4,%xmm4
	rorl	$5,%r13d
	xorl	%eax,%r14d
	vpaddd	%xmm7,%xmm0,%xmm0
	andl	%r8d,%r12d
	vpxor	%xmm10,%xmm9,%xmm9
	vmovdqu	16-128(%rdi),%xmm10
	xorl	%r8d,%r13d
	addl	0(%rsp),%r11d
	movl	%eax,%r15d
.byte	143,232,120,194,245,11
	rorl	$11,%r14d
	xorl	%r10d,%r12d
	vpxor	%xmm5,%xmm4,%xmm4
	xorl	%ebx,%r15d
	rorl	$6,%r13d
	addl	%r12d,%r11d
	andl	%r15d,%esi
.byte	143,232,120,194,251,13
	xorl	%eax,%r14d
	addl	%r13d,%r11d
	vpxor	%xmm6,%xmm4,%xmm4
	xorl	%ebx,%esi
	addl	%r11d,%edx
	vpsrld	$10,%xmm3,%xmm6
	rorl	$2,%r14d
	addl	%esi,%r11d
	vpaddd	%xmm4,%xmm0,%xmm0
	movl	%edx,%r13d
	addl	%r11d,%r14d
.byte	143,232,120,194,239,2
	rorl	$14,%r13d
	movl	%r14d,%r11d
	vpxor	%xmm6,%xmm7,%xmm7
	movl	%r8d,%r12d
	xorl	%edx,%r13d
	rorl	$9,%r14d
	xorl	%r9d,%r12d
	vpxor	%xmm5,%xmm7,%xmm7
	rorl	$5,%r13d
	xorl	%r11d,%r14d
	andl	%edx,%r12d
	vpxor	%xmm8,%xmm9,%xmm9
	xorl	%edx,%r13d
	vpsrldq	$8,%xmm7,%xmm7
	addl	4(%rsp),%r10d
	movl	%r11d,%esi
	rorl	$11,%r14d
	xorl	%r9d,%r12d
	vpaddd	%xmm7,%xmm0,%xmm0
	xorl	%eax,%esi
	rorl	$6,%r13d
	addl	%r12d,%r10d
	andl	%esi,%r15d
.byte	143,232,120,194,248,13
	xorl	%r11d,%r14d
	addl	%r13d,%r10d
	vpsrld	$10,%xmm0,%xmm6
	xorl	%eax,%r15d
	addl	%r10d,%ecx
.byte	143,232,120,194,239,2
	rorl	$2,%r14d
	addl	%r15d,%r10d
	vpxor	%xmm6,%xmm7,%xmm7
	movl	%ecx,%r13d
	addl	%r10d,%r14d
	rorl	$14,%r13d
	movl	%r14d,%r10d
	vpxor	%xmm5,%xmm7,%xmm7
	movl	%edx,%r12d
	xorl	%ecx,%r13d
	rorl	$9,%r14d
	xorl	%r8d,%r12d
	vpslldq	$8,%xmm7,%xmm7
	rorl	$5,%r13d
	xorl	%r10d,%r14d
	andl	%ecx,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	32-128(%rdi),%xmm10
	xorl	%ecx,%r13d
	vpaddd	%xmm7,%xmm0,%xmm0
	addl	8(%rsp),%r9d
	movl	%r10d,%r15d
	rorl	$11,%r14d
	xorl	%r8d,%r12d
	vpaddd	0(%rbp),%xmm0,%xmm6
	xorl	%r11d,%r15d
	rorl	$6,%r13d
	addl	%r12d,%r9d
	andl	%r15d,%esi
	xorl	%r10d,%r14d
	addl	%r13d,%r9d
	xorl	%r11d,%esi
	addl	%r9d,%ebx
	rorl	$2,%r14d
	addl	%esi,%r9d
	movl	%ebx,%r13d
	addl	%r9d,%r14d
	rorl	$14,%r13d
	movl	%r14d,%r9d
	movl	%ecx,%r12d
	xorl	%ebx,%r13d
	rorl	$9,%r14d
	xorl	%edx,%r12d
	rorl	$5,%r13d
	xorl	%r9d,%r14d
	andl	%ebx,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	48-128(%rdi),%xmm10
	xorl	%ebx,%r13d
	addl	12(%rsp),%r8d
	movl	%r9d,%esi
	rorl	$11,%r14d
	xorl	%edx,%r12d
	xorl	%r10d,%esi
	rorl	$6,%r13d
	addl	%r12d,%r8d
	andl	%esi,%r15d
	xorl	%r9d,%r14d
	addl	%r13d,%r8d
	xorl	%r10d,%r15d
	addl	%r8d,%eax
	rorl	$2,%r14d
	addl	%r15d,%r8d
	movl	%eax,%r13d
	addl	%r8d,%r14d
	vmovdqa	%xmm6,0(%rsp)
	vpalignr	$4,%xmm1,%xmm2,%xmm4
	rorl	$14,%r13d
	movl	%r14d,%r8d
	vpalignr	$4,%xmm3,%xmm0,%xmm7
	movl	%ebx,%r12d
	xorl	%eax,%r13d
.byte	143,232,120,194,236,14
	rorl	$9,%r14d
	xorl	%ecx,%r12d
	vpsrld	$3,%xmm4,%xmm4
	rorl	$5,%r13d
	xorl	%r8d,%r14d
	vpaddd	%xmm7,%xmm1,%xmm1
	andl	%eax,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	64-128(%rdi),%xmm10
	xorl	%eax,%r13d
	addl	16(%rsp),%edx
	movl	%r8d,%r15d
.byte	143,232,120,194,245,11
	rorl	$11,%r14d
	xorl	%ecx,%r12d
	vpxor	%xmm5,%xmm4,%xmm4
	xorl	%r9d,%r15d
	rorl	$6,%r13d
	addl	%r12d,%edx
	andl	%r15d,%esi
.byte	143,232,120,194,248,13
	xorl	%r8d,%r14d
	addl	%r13d,%edx
	vpxor	%xmm6,%xmm4,%xmm4
	xorl	%r9d,%esi
	addl	%edx,%r11d
	vpsrld	$10,%xmm0,%xmm6
	rorl	$2,%r14d
	addl	%esi,%edx
	vpaddd	%xmm4,%xmm1,%xmm1
	movl	%r11d,%r13d
	addl	%edx,%r14d
.byte	143,232,120,194,239,2
	rorl	$14,%r13d
	movl	%r14d,%edx
	vpxor	%xmm6,%xmm7,%xmm7
	movl	%eax,%r12d
	xorl	%r11d,%r13d
	rorl	$9,%r14d
	xorl	%ebx,%r12d
	vpxor	%xmm5,%xmm7,%xmm7
	rorl	$5,%r13d
	xorl	%edx,%r14d
	andl	%r11d,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	80-128(%rdi),%xmm10
	xorl	%r11d,%r13d
	vpsrldq	$8,%xmm7,%xmm7
	addl	20(%rsp),%ecx
	movl	%edx,%esi
	rorl	$11,%r14d
	xorl	%ebx,%r12d
	vpaddd	%xmm7,%xmm1,%xmm1
	xorl	%r8d,%esi
	rorl	$6,%r13d
	addl	%r12d,%ecx
	andl	%esi,%r15d
.byte	143,232,120,194,249,13
	xorl	%edx,%r14d
	addl	%r13d,%ecx
	vpsrld	$10,%xmm1,%xmm6
	xorl	%r8d,%r15d
	addl	%ecx,%r10d
.byte	143,232,120,194,239,2
	rorl	$2,%r14d
	addl	%r15d,%ecx
	vpxor	%xmm6,%xmm7,%xmm7
	movl	%r10d,%r13d
	addl	%ecx,%r14d
	rorl	$14,%r13d
	movl	%r14d,%ecx
	vpxor	%xmm5,%xmm7,%xmm7
	movl	%r11d,%r12d
	xorl	%r10d,%r13d
	rorl	$9,%r14d
	xorl	%eax,%r12d
	vpslldq	$8,%xmm7,%xmm7
	rorl	$5,%r13d
	xorl	%ecx,%r14d
	andl	%r10d,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	96-128(%rdi),%xmm10
	xorl	%r10d,%r13d
	vpaddd	%xmm7,%xmm1,%xmm1
	addl	24(%rsp),%ebx
	movl	%ecx,%r15d
	rorl	$11,%r14d
	xorl	%eax,%r12d
	vpaddd	32(%rbp),%xmm1,%xmm6
	xorl	%edx,%r15d
	rorl	$6,%r13d
	addl	%r12d,%ebx
	andl	%r15d,%esi
	xorl	%ecx,%r14d
	addl	%r13d,%ebx
	xorl	%edx,%esi
	addl	%ebx,%r9d
	rorl	$2,%r14d
	addl	%esi,%ebx
	movl	%r9d,%r13d
	addl	%ebx,%r14d
	rorl	$14,%r13d
	movl	%r14d,%ebx
	movl	%r10d,%r12d
	xorl	%r9d,%r13d
	rorl	$9,%r14d
	xorl	%r11d,%r12d
	rorl	$5,%r13d
	xorl	%ebx,%r14d
	andl	%r9d,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	112-128(%rdi),%xmm10
	xorl	%r9d,%r13d
	addl	28(%rsp),%eax
	movl	%ebx,%esi
	rorl	$11,%r14d
	xorl	%r11d,%r12d
	xorl	%ecx,%esi
	rorl	$6,%r13d
	addl	%r12d,%eax
	andl	%esi,%r15d
	xorl	%ebx,%r14d
	addl	%r13d,%eax
	xorl	%ecx,%r15d
	addl	%eax,%r8d
	rorl	$2,%r14d
	addl	%r15d,%eax
	movl	%r8d,%r13d
	addl	%eax,%r14d
	vmovdqa	%xmm6,16(%rsp)
	vpalignr	$4,%xmm2,%xmm3,%xmm4
	rorl	$14,%r13d
	movl	%r14d,%eax
	vpalignr	$4,%xmm0,%xmm1,%xmm7
	movl	%r9d,%r12d
	xorl	%r8d,%r13d
.byte	143,232,120,194,236,14
	rorl	$9,%r14d
	xorl	%r10d,%r12d
	vpsrld	$3,%xmm4,%xmm4
	rorl	$5,%r13d
	xorl	%eax,%r14d
	vpaddd	%xmm7,%xmm2,%xmm2
	andl	%r8d,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	128-128(%rdi),%xmm10
	xorl	%r8d,%r13d
	addl	32(%rsp),%r11d
	movl	%eax,%r15d
.byte	143,232,120,194,245,11
	rorl	$11,%r14d
	xorl	%r10d,%r12d
	vpxor	%xmm5,%xmm4,%xmm4
	xorl	%ebx,%r15d
	rorl	$6,%r13d
	addl	%r12d,%r11d
	andl	%r15d,%esi
.byte	143,232,120,194,249,13
	xorl	%eax,%r14d
	addl	%r13d,%r11d
	vpxor	%xmm6,%xmm4,%xmm4
	xorl	%ebx,%esi
	addl	%r11d,%edx
	vpsrld	$10,%xmm1,%xmm6
	rorl	$2,%r14d
	addl	%esi,%r11d
	vpaddd	%xmm4,%xmm2,%xmm2
	movl	%edx,%r13d
	addl	%r11d,%r14d
.byte	143,232,120,194,239,2
	rorl	$14,%r13d
	movl	%r14d,%r11d
	vpxor	%xmm6,%xmm7,%xmm7
	movl	%r8d,%r12d
	xorl	%edx,%r13d
	rorl	$9,%r14d
	xorl	%r9d,%r12d
	vpxor	%xmm5,%xmm7,%xmm7
	rorl	$5,%r13d
	xorl	%r11d,%r14d
	andl	%edx,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	144-128(%rdi),%xmm10
	xorl	%edx,%r13d
	vpsrldq	$8,%xmm7,%xmm7
	addl	36(%rsp),%r10d
	movl	%r11d,%esi
	rorl	$11,%r14d
	xorl	%r9d,%r12d
	vpaddd	%xmm7,%xmm2,%xmm2
	xorl	%eax,%esi
	rorl	$6,%r13d
	addl	%r12d,%r10d
	andl	%esi,%r15d
.byte	143,232,120,194,250,13
	xorl	%r11d,%r14d
	addl	%r13d,%r10d
	vpsrld	$10,%xmm2,%xmm6
	xorl	%eax,%r15d
	addl	%r10d,%ecx
.byte	143,232,120,194,239,2
	rorl	$2,%r14d
	addl	%r15d,%r10d
	vpxor	%xmm6,%xmm7,%xmm7
	movl	%ecx,%r13d
	addl	%r10d,%r14d
	rorl	$14,%r13d
	movl	%r14d,%r10d
	vpxor	%xmm5,%xmm7,%xmm7
	movl	%edx,%r12d
	xorl	%ecx,%r13d
	rorl	$9,%r14d
	xorl	%r8d,%r12d
	vpslldq	$8,%xmm7,%xmm7
	rorl	$5,%r13d
	xorl	%r10d,%r14d
	andl	%ecx,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	160-128(%rdi),%xmm10
	xorl	%ecx,%r13d
	vpaddd	%xmm7,%xmm2,%xmm2
	addl	40(%rsp),%r9d
	movl	%r10d,%r15d
	rorl	$11,%r14d
	xorl	%r8d,%r12d
	vpaddd	64(%rbp),%xmm2,%xmm6
	xorl	%r11d,%r15d
	rorl	$6,%r13d
	addl	%r12d,%r9d
	andl	%r15d,%esi
	xorl	%r10d,%r14d
	addl	%r13d,%r9d
	xorl	%r11d,%esi
	addl	%r9d,%ebx
	rorl	$2,%r14d
	addl	%esi,%r9d
	movl	%ebx,%r13d
	addl	%r9d,%r14d
	rorl	$14,%r13d
	movl	%r14d,%r9d
	movl	%ecx,%r12d
	xorl	%ebx,%r13d
	rorl	$9,%r14d
	xorl	%edx,%r12d
	rorl	$5,%r13d
	xorl	%r9d,%r14d
	andl	%ebx,%r12d
	vaesenclast	%xmm10,%xmm9,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	176-128(%rdi),%xmm10
	xorl	%ebx,%r13d
	addl	44(%rsp),%r8d
	movl	%r9d,%esi
	rorl	$11,%r14d
	xorl	%edx,%r12d
	xorl	%r10d,%esi
	rorl	$6,%r13d
	addl	%r12d,%r8d
	andl	%esi,%r15d
	xorl	%r9d,%r14d
	addl	%r13d,%r8d
	xorl	%r10d,%r15d
	addl	%r8d,%eax
	rorl	$2,%r14d
	addl	%r15d,%r8d
	movl	%eax,%r13d
	addl	%r8d,%r14d
	vmovdqa	%xmm6,32(%rsp)
	vpalignr	$4,%xmm3,%xmm0,%xmm4
	rorl	$14,%r13d
	movl	%r14d,%r8d
	vpalignr	$4,%xmm1,%xmm2,%xmm7
	movl	%ebx,%r12d
	xorl	%eax,%r13d
.byte	143,232,120,194,236,14
	rorl	$9,%r14d
	xorl	%ecx,%r12d
	vpsrld	$3,%xmm4,%xmm4
	rorl	$5,%r13d
	xorl	%r8d,%r14d
	vpaddd	%xmm7,%xmm3,%xmm3
	andl	%eax,%r12d
	vpand	%xmm12,%xmm11,%xmm8
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	192-128(%rdi),%xmm10
	xorl	%eax,%r13d
	addl	48(%rsp),%edx
	movl	%r8d,%r15d
.byte	143,232,120,194,245,11
	rorl	$11,%r14d
	xorl	%ecx,%r12d
	vpxor	%xmm5,%xmm4,%xmm4
	xorl	%r9d,%r15d
	rorl	$6,%r13d
	addl	%r12d,%edx
	andl	%r15d,%esi
.byte	143,232,120,194,250,13
	xorl	%r8d,%r14d
	addl	%r13d,%edx
	vpxor	%xmm6,%xmm4,%xmm4
	xorl	%r9d,%esi
	addl	%edx,%r11d
	vpsrld	$10,%xmm2,%xmm6
	rorl	$2,%r14d
	addl	%esi,%edx
	vpaddd	%xmm4,%xmm3,%xmm3
	movl	%r11d,%r13d
	addl	%edx,%r14d
.byte	143,232,120,194,239,2
	rorl	$14,%r13d
	movl	%r14d,%edx
	vpxor	%xmm6,%xmm7,%xmm7
	movl	%eax,%r12d
	xorl	%r11d,%r13d
	rorl	$9,%r14d
	xorl	%ebx,%r12d
	vpxor	%xmm5,%xmm7,%xmm7
	rorl	$5,%r13d
	xorl	%edx,%r14d
	andl	%r11d,%r12d
	vaesenclast	%xmm10,%xmm9,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	208-128(%rdi),%xmm10
	xorl	%r11d,%r13d
	vpsrldq	$8,%xmm7,%xmm7
	addl	52(%rsp),%ecx
	movl	%edx,%esi
	rorl	$11,%r14d
	xorl	%ebx,%r12d
	vpaddd	%xmm7,%xmm3,%xmm3
	xorl	%r8d,%esi
	rorl	$6,%r13d
	addl	%r12d,%ecx
	andl	%esi,%r15d
.byte	143,232,120,194,251,13
	xorl	%edx,%r14d
	addl	%r13d,%ecx
	vpsrld	$10,%xmm3,%xmm6
	xorl	%r8d,%r15d
	addl	%ecx,%r10d
.byte	143,232,120,194,239,2
	rorl	$2,%r14d
	addl	%r15d,%ecx
	vpxor	%xmm6,%xmm7,%xmm7
	movl	%r10d,%r13d
	addl	%ecx,%r14d
	rorl	$14,%r13d
	movl	%r14d,%ecx
	vpxor	%xmm5,%xmm7,%xmm7
	movl	%r11d,%r12d
	xorl	%r10d,%r13d
	rorl	$9,%r14d
	xorl	%eax,%r12d
	vpslldq	$8,%xmm7,%xmm7
	rorl	$5,%r13d
	xorl	%ecx,%r14d
	andl	%r10d,%r12d
	vpand	%xmm13,%xmm11,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	224-128(%rdi),%xmm10
	xorl	%r10d,%r13d
	vpaddd	%xmm7,%xmm3,%xmm3
	addl	56(%rsp),%ebx
	movl	%ecx,%r15d
	rorl	$11,%r14d
	xorl	%eax,%r12d
	vpaddd	96(%rbp),%xmm3,%xmm6
	xorl	%edx,%r15d
	rorl	$6,%r13d
	addl	%r12d,%ebx
	andl	%r15d,%esi
	xorl	%ecx,%r14d
	addl	%r13d,%ebx
	xorl	%edx,%esi
	addl	%ebx,%r9d
	rorl	$2,%r14d
	addl	%esi,%ebx
	movl	%r9d,%r13d
	addl	%ebx,%r14d
	rorl	$14,%r13d
	movl	%r14d,%ebx
	movl	%r10d,%r12d
	xorl	%r9d,%r13d
	rorl	$9,%r14d
	xorl	%r11d,%r12d
	rorl	$5,%r13d
	xorl	%ebx,%r14d
	andl	%r9d,%r12d
	vpor	%xmm11,%xmm8,%xmm8
	vaesenclast	%xmm10,%xmm9,%xmm11
	vmovdqu	0-128(%rdi),%xmm10
	xorl	%r9d,%r13d
	addl	60(%rsp),%eax
	movl	%ebx,%esi
	rorl	$11,%r14d
	xorl	%r11d,%r12d
	xorl	%ecx,%esi
	rorl	$6,%r13d
	addl	%r12d,%eax
	andl	%esi,%r15d
	xorl	%ebx,%r14d
	addl	%r13d,%eax
	xorl	%ecx,%r15d
	addl	%eax,%r8d
	rorl	$2,%r14d
	addl	%r15d,%eax
	movl	%r8d,%r13d
	addl	%eax,%r14d
	vmovdqa	%xmm6,48(%rsp)
	movq	64+0(%rsp),%r12
	vpand	%xmm14,%xmm11,%xmm11
	movq	64+8(%rsp),%r15
	vpor	%xmm11,%xmm8,%xmm8
	vmovdqu	%xmm8,(%r15,%r12,1)
	leaq	16(%r12),%r12
	cmpb	$0,131(%rbp)
	jne	L$xop_00_47
	vmovdqu	(%r12),%xmm9
	movq	%r12,64+0(%rsp)
	rorl	$14,%r13d
	movl	%r14d,%eax
	movl	%r9d,%r12d
	xorl	%r8d,%r13d
	rorl	$9,%r14d
	xorl	%r10d,%r12d
	rorl	$5,%r13d
	xorl	%eax,%r14d
	andl	%r8d,%r12d
	vpxor	%xmm10,%xmm9,%xmm9
	vmovdqu	16-128(%rdi),%xmm10
	xorl	%r8d,%r13d
	addl	0(%rsp),%r11d
	movl	%eax,%r15d
	rorl	$11,%r14d
	xorl	%r10d,%r12d
	xorl	%ebx,%r15d
	rorl	$6,%r13d
	addl	%r12d,%r11d
	andl	%r15d,%esi
	xorl	%eax,%r14d
	addl	%r13d,%r11d
	xorl	%ebx,%esi
	addl	%r11d,%edx
	rorl	$2,%r14d
	addl	%esi,%r11d
	movl	%edx,%r13d
	addl	%r11d,%r14d
	rorl	$14,%r13d
	movl	%r14d,%r11d
	movl	%r8d,%r12d
	xorl	%edx,%r13d
	rorl	$9,%r14d
	xorl	%r9d,%r12d
	rorl	$5,%r13d
	xorl	%r11d,%r14d
	andl	%edx,%r12d
	vpxor	%xmm8,%xmm9,%xmm9
	xorl	%edx,%r13d
	addl	4(%rsp),%r10d
	movl	%r11d,%esi
	rorl	$11,%r14d
	xorl	%r9d,%r12d
	xorl	%eax,%esi
	rorl	$6,%r13d
	addl	%r12d,%r10d
	andl	%esi,%r15d
	xorl	%r11d,%r14d
	addl	%r13d,%r10d
	xorl	%eax,%r15d
	addl	%r10d,%ecx
	rorl	$2,%r14d
	addl	%r15d,%r10d
	movl	%ecx,%r13d
	addl	%r10d,%r14d
	rorl	$14,%r13d
	movl	%r14d,%r10d
	movl	%edx,%r12d
	xorl	%ecx,%r13d
	rorl	$9,%r14d
	xorl	%r8d,%r12d
	rorl	$5,%r13d
	xorl	%r10d,%r14d
	andl	%ecx,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	32-128(%rdi),%xmm10
	xorl	%ecx,%r13d
	addl	8(%rsp),%r9d
	movl	%r10d,%r15d
	rorl	$11,%r14d
	xorl	%r8d,%r12d
	xorl	%r11d,%r15d
	rorl	$6,%r13d
	addl	%r12d,%r9d
	andl	%r15d,%esi
	xorl	%r10d,%r14d
	addl	%r13d,%r9d
	xorl	%r11d,%esi
	addl	%r9d,%ebx
	rorl	$2,%r14d
	addl	%esi,%r9d
	movl	%ebx,%r13d
	addl	%r9d,%r14d
	rorl	$14,%r13d
	movl	%r14d,%r9d
	movl	%ecx,%r12d
	xorl	%ebx,%r13d
	rorl	$9,%r14d
	xorl	%edx,%r12d
	rorl	$5,%r13d
	xorl	%r9d,%r14d
	andl	%ebx,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	48-128(%rdi),%xmm10
	xorl	%ebx,%r13d
	addl	12(%rsp),%r8d
	movl	%r9d,%esi
	rorl	$11,%r14d
	xorl	%edx,%r12d
	xorl	%r10d,%esi
	rorl	$6,%r13d
	addl	%r12d,%r8d
	andl	%esi,%r15d
	xorl	%r9d,%r14d
	addl	%r13d,%r8d
	xorl	%r10d,%r15d
	addl	%r8d,%eax
	rorl	$2,%r14d
	addl	%r15d,%r8d
	movl	%eax,%r13d
	addl	%r8d,%r14d
	rorl	$14,%r13d
	movl	%r14d,%r8d
	movl	%ebx,%r12d
	xorl	%eax,%r13d
	rorl	$9,%r14d
	xorl	%ecx,%r12d
	rorl	$5,%r13d
	xorl	%r8d,%r14d
	andl	%eax,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	64-128(%rdi),%xmm10
	xorl	%eax,%r13d
	addl	16(%rsp),%edx
	movl	%r8d,%r15d
	rorl	$11,%r14d
	xorl	%ecx,%r12d
	xorl	%r9d,%r15d
	rorl	$6,%r13d
	addl	%r12d,%edx
	andl	%r15d,%esi
	xorl	%r8d,%r14d
	addl	%r13d,%edx
	xorl	%r9d,%esi
	addl	%edx,%r11d
	rorl	$2,%r14d
	addl	%esi,%edx
	movl	%r11d,%r13d
	addl	%edx,%r14d
	rorl	$14,%r13d
	movl	%r14d,%edx
	movl	%eax,%r12d
	xorl	%r11d,%r13d
	rorl	$9,%r14d
	xorl	%ebx,%r12d
	rorl	$5,%r13d
	xorl	%edx,%r14d
	andl	%r11d,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	80-128(%rdi),%xmm10
	xorl	%r11d,%r13d
	addl	20(%rsp),%ecx
	movl	%edx,%esi
	rorl	$11,%r14d
	xorl	%ebx,%r12d
	xorl	%r8d,%esi
	rorl	$6,%r13d
	addl	%r12d,%ecx
	andl	%esi,%r15d
	xorl	%edx,%r14d
	addl	%r13d,%ecx
	xorl	%r8d,%r15d
	addl	%ecx,%r10d
	rorl	$2,%r14d
	addl	%r15d,%ecx
	movl	%r10d,%r13d
	addl	%ecx,%r14d
	rorl	$14,%r13d
	movl	%r14d,%ecx
	movl	%r11d,%r12d
	xorl	%r10d,%r13d
	rorl	$9,%r14d
	xorl	%eax,%r12d
	rorl	$5,%r13d
	xorl	%ecx,%r14d
	andl	%r10d,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	96-128(%rdi),%xmm10
	xorl	%r10d,%r13d
	addl	24(%rsp),%ebx
	movl	%ecx,%r15d
	rorl	$11,%r14d
	xorl	%eax,%r12d
	xorl	%edx,%r15d
	rorl	$6,%r13d
	addl	%r12d,%ebx
	andl	%r15d,%esi
	xorl	%ecx,%r14d
	addl	%r13d,%ebx
	xorl	%edx,%esi
	addl	%ebx,%r9d
	rorl	$2,%r14d
	addl	%esi,%ebx
	movl	%r9d,%r13d
	addl	%ebx,%r14d
	rorl	$14,%r13d
	movl	%r14d,%ebx
	movl	%r10d,%r12d
	xorl	%r9d,%r13d
	rorl	$9,%r14d
	xorl	%r11d,%r12d
	rorl	$5,%r13d
	xorl	%ebx,%r14d
	andl	%r9d,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	112-128(%rdi),%xmm10
	xorl	%r9d,%r13d
	addl	28(%rsp),%eax
	movl	%ebx,%esi
	rorl	$11,%r14d
	xorl	%r11d,%r12d
	xorl	%ecx,%esi
	rorl	$6,%r13d
	addl	%r12d,%eax
	andl	%esi,%r15d
	xorl	%ebx,%r14d
	addl	%r13d,%eax
	xorl	%ecx,%r15d
	addl	%eax,%r8d
	rorl	$2,%r14d
	addl	%r15d,%eax
	movl	%r8d,%r13d
	addl	%eax,%r14d
	rorl	$14,%r13d
	movl	%r14d,%eax
	movl	%r9d,%r12d
	xorl	%r8d,%r13d
	rorl	$9,%r14d
	xorl	%r10d,%r12d
	rorl	$5,%r13d
	xorl	%eax,%r14d
	andl	%r8d,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	128-128(%rdi),%xmm10
	xorl	%r8d,%r13d
	addl	32(%rsp),%r11d
	movl	%eax,%r15d
	rorl	$11,%r14d
	xorl	%r10d,%r12d
	xorl	%ebx,%r15d
	rorl	$6,%r13d
	addl	%r12d,%r11d
	andl	%r15d,%esi
	xorl	%eax,%r14d
	addl	%r13d,%r11d
	xorl	%ebx,%esi
	addl	%r11d,%edx
	rorl	$2,%r14d
	addl	%esi,%r11d
	movl	%edx,%r13d
	addl	%r11d,%r14d
	rorl	$14,%r13d
	movl	%r14d,%r11d
	movl	%r8d,%r12d
	xorl	%edx,%r13d
	rorl	$9,%r14d
	xorl	%r9d,%r12d
	rorl	$5,%r13d
	xorl	%r11d,%r14d
	andl	%edx,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	144-128(%rdi),%xmm10
	xorl	%edx,%r13d
	addl	36(%rsp),%r10d
	movl	%r11d,%esi
	rorl	$11,%r14d
	xorl	%r9d,%r12d
	xorl	%eax,%esi
	rorl	$6,%r13d
	addl	%r12d,%r10d
	andl	%esi,%r15d
	xorl	%r11d,%r14d
	addl	%r13d,%r10d
	xorl	%eax,%r15d
	addl	%r10d,%ecx
	rorl	$2,%r14d
	addl	%r15d,%r10d
	movl	%ecx,%r13d
	addl	%r10d,%r14d
	rorl	$14,%r13d
	movl	%r14d,%r10d
	movl	%edx,%r12d
	xorl	%ecx,%r13d
	rorl	$9,%r14d
	xorl	%r8d,%r12d
	rorl	$5,%r13d
	xorl	%r10d,%r14d
	andl	%ecx,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	160-128(%rdi),%xmm10
	xorl	%ecx,%r13d
	addl	40(%rsp),%r9d
	movl	%r10d,%r15d
	rorl	$11,%r14d
	xorl	%r8d,%r12d
	xorl	%r11d,%r15d
	rorl	$6,%r13d
	addl	%r12d,%r9d
	andl	%r15d,%esi
	xorl	%r10d,%r14d
	addl	%r13d,%r9d
	xorl	%r11d,%esi
	addl	%r9d,%ebx
	rorl	$2,%r14d
	addl	%esi,%r9d
	movl	%ebx,%r13d
	addl	%r9d,%r14d
	rorl	$14,%r13d
	movl	%r14d,%r9d
	movl	%ecx,%r12d
	xorl	%ebx,%r13d
	rorl	$9,%r14d
	xorl	%edx,%r12d
	rorl	$5,%r13d
	xorl	%r9d,%r14d
	andl	%ebx,%r12d
	vaesenclast	%xmm10,%xmm9,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	176-128(%rdi),%xmm10
	xorl	%ebx,%r13d
	addl	44(%rsp),%r8d
	movl	%r9d,%esi
	rorl	$11,%r14d
	xorl	%edx,%r12d
	xorl	%r10d,%esi
	rorl	$6,%r13d
	addl	%r12d,%r8d
	andl	%esi,%r15d
	xorl	%r9d,%r14d
	addl	%r13d,%r8d
	xorl	%r10d,%r15d
	addl	%r8d,%eax
	rorl	$2,%r14d
	addl	%r15d,%r8d
	movl	%eax,%r13d
	addl	%r8d,%r14d
	rorl	$14,%r13d
	movl	%r14d,%r8d
	movl	%ebx,%r12d
	xorl	%eax,%r13d
	rorl	$9,%r14d
	xorl	%ecx,%r12d
	rorl	$5,%r13d
	xorl	%r8d,%r14d
	andl	%eax,%r12d
	vpand	%xmm12,%xmm11,%xmm8
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	192-128(%rdi),%xmm10
	xorl	%eax,%r13d
	addl	48(%rsp),%edx
	movl	%r8d,%r15d
	rorl	$11,%r14d
	xorl	%ecx,%r12d
	xorl	%r9d,%r15d
	rorl	$6,%r13d
	addl	%r12d,%edx
	andl	%r15d,%esi
	xorl	%r8d,%r14d
	addl	%r13d,%edx
	xorl	%r9d,%esi
	addl	%edx,%r11d
	rorl	$2,%r14d
	addl	%esi,%edx
	movl	%r11d,%r13d
	addl	%edx,%r14d
	rorl	$14,%r13d
	movl	%r14d,%edx
	movl	%eax,%r12d
	xorl	%r11d,%r13d
	rorl	$9,%r14d
	xorl	%ebx,%r12d
	rorl	$5,%r13d
	xorl	%edx,%r14d
	andl	%r11d,%r12d
	vaesenclast	%xmm10,%xmm9,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	208-128(%rdi),%xmm10
	xorl	%r11d,%r13d
	addl	52(%rsp),%ecx
	movl	%edx,%esi
	rorl	$11,%r14d
	xorl	%ebx,%r12d
	xorl	%r8d,%esi
	rorl	$6,%r13d
	addl	%r12d,%ecx
	andl	%esi,%r15d
	xorl	%edx,%r14d
	addl	%r13d,%ecx
	xorl	%r8d,%r15d
	addl	%ecx,%r10d
	rorl	$2,%r14d
	addl	%r15d,%ecx
	movl	%r10d,%r13d
	addl	%ecx,%r14d
	rorl	$14,%r13d
	movl	%r14d,%ecx
	movl	%r11d,%r12d
	xorl	%r10d,%r13d
	rorl	$9,%r14d
	xorl	%eax,%r12d
	rorl	$5,%r13d
	xorl	%ecx,%r14d
	andl	%r10d,%r12d
	vpand	%xmm13,%xmm11,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	224-128(%rdi),%xmm10
	xorl	%r10d,%r13d
	addl	56(%rsp),%ebx
	movl	%ecx,%r15d
	rorl	$11,%r14d
	xorl	%eax,%r12d
	xorl	%edx,%r15d
	rorl	$6,%r13d
	addl	%r12d,%ebx
	andl	%r15d,%esi
	xorl	%ecx,%r14d
	addl	%r13d,%ebx
	xorl	%edx,%esi
	addl	%ebx,%r9d
	rorl	$2,%r14d
	addl	%esi,%ebx
	movl	%r9d,%r13d
	addl	%ebx,%r14d
	rorl	$14,%r13d
	movl	%r14d,%ebx
	movl	%r10d,%r12d
	xorl	%r9d,%r13d
	rorl	$9,%r14d
	xorl	%r11d,%r12d
	rorl	$5,%r13d
	xorl	%ebx,%r14d
	andl	%r9d,%r12d
	vpor	%xmm11,%xmm8,%xmm8
	vaesenclast	%xmm10,%xmm9,%xmm11
	vmovdqu	0-128(%rdi),%xmm10
	xorl	%r9d,%r13d
	addl	60(%rsp),%eax
	movl	%ebx,%esi
	rorl	$11,%r14d
	xorl	%r11d,%r12d
	xorl	%ecx,%esi
	rorl	$6,%r13d
	addl	%r12d,%eax
	andl	%esi,%r15d
	xorl	%ebx,%r14d
	addl	%r13d,%eax
	xorl	%ecx,%r15d
	addl	%eax,%r8d
	rorl	$2,%r14d
	addl	%r15d,%eax
	movl	%r8d,%r13d
	addl	%eax,%r14d
	movq	64+0(%rsp),%r12
	movq	64+8(%rsp),%r13
	movq	64+40(%rsp),%r15
	movq	64+48(%rsp),%rsi

	vpand	%xmm14,%xmm11,%xmm11
	movl	%r14d,%eax
	vpor	%xmm11,%xmm8,%xmm8
	vmovdqu	%xmm8,(%r12,%r13,1)
	leaq	16(%r12),%r12

	addl	0(%r15),%eax
	addl	4(%r15),%ebx
	addl	8(%r15),%ecx
	addl	12(%r15),%edx
	addl	16(%r15),%r8d
	addl	20(%r15),%r9d
	addl	24(%r15),%r10d
	addl	28(%r15),%r11d

	cmpq	64+16(%rsp),%r12

	movl	%eax,0(%r15)
	movl	%ebx,4(%r15)
	movl	%ecx,8(%r15)
	movl	%edx,12(%r15)
	movl	%r8d,16(%r15)
	movl	%r9d,20(%r15)
	movl	%r10d,24(%r15)
	movl	%r11d,28(%r15)

	jb	L$loop_xop

	movq	64+32(%rsp),%r8
	movq	64+56(%rsp),%rsi
	vmovdqu	%xmm8,(%r8)
	vzeroall
	movq	(%rsi),%r15
	movq	8(%rsi),%r14
	movq	16(%rsi),%r13
	movq	24(%rsi),%r12
	movq	32(%rsi),%rbp
	movq	40(%rsi),%rbx
	leaq	48(%rsi),%rsp
L$epilogue_xop:
	.byte	0xf3,0xc3


.p2align	6
aesni_cbc_sha256_enc_avx:
L$avx_shortcut:
	movq	8(%rsp),%r10
	pushq	%rbx
	pushq	%rbp
	pushq	%r12
	pushq	%r13
	pushq	%r14
	pushq	%r15
	movq	%rsp,%r11
	subq	$128,%rsp
	andq	$-64,%rsp

	shlq	$6,%rdx
	subq	%rdi,%rsi
	subq	%rdi,%r10
	addq	%rdi,%rdx


	movq	%rsi,64+8(%rsp)
	movq	%rdx,64+16(%rsp)

	movq	%r8,64+32(%rsp)
	movq	%r9,64+40(%rsp)
	movq	%r10,64+48(%rsp)
	movq	%r11,64+56(%rsp)
L$prologue_avx:
	vzeroall

	movq	%rdi,%r12
	leaq	128(%rcx),%rdi
	leaq	K256+544(%rip),%r13
	movl	240-128(%rdi),%r14d
	movq	%r9,%r15
	movq	%r10,%rsi
	vmovdqu	(%r8),%xmm8
	subq	$9,%r14

	movl	0(%r15),%eax
	movl	4(%r15),%ebx
	movl	8(%r15),%ecx
	movl	12(%r15),%edx
	movl	16(%r15),%r8d
	movl	20(%r15),%r9d
	movl	24(%r15),%r10d
	movl	28(%r15),%r11d

	vmovdqa	0(%r13,%r14,8),%xmm14
	vmovdqa	16(%r13,%r14,8),%xmm13
	vmovdqa	32(%r13,%r14,8),%xmm12
	vmovdqu	0-128(%rdi),%xmm10
	jmp	L$loop_avx
.p2align	4
L$loop_avx:
	vmovdqa	K256+512(%rip),%xmm7
	vmovdqu	0(%rsi,%r12,1),%xmm0
	vmovdqu	16(%rsi,%r12,1),%xmm1
	vmovdqu	32(%rsi,%r12,1),%xmm2
	vmovdqu	48(%rsi,%r12,1),%xmm3
	vpshufb	%xmm7,%xmm0,%xmm0
	leaq	K256(%rip),%rbp
	vpshufb	%xmm7,%xmm1,%xmm1
	vpshufb	%xmm7,%xmm2,%xmm2
	vpaddd	0(%rbp),%xmm0,%xmm4
	vpshufb	%xmm7,%xmm3,%xmm3
	vpaddd	32(%rbp),%xmm1,%xmm5
	vpaddd	64(%rbp),%xmm2,%xmm6
	vpaddd	96(%rbp),%xmm3,%xmm7
	vmovdqa	%xmm4,0(%rsp)
	movl	%eax,%r14d
	vmovdqa	%xmm5,16(%rsp)
	movl	%ebx,%esi
	vmovdqa	%xmm6,32(%rsp)
	xorl	%ecx,%esi
	vmovdqa	%xmm7,48(%rsp)
	movl	%r8d,%r13d
	jmp	L$avx_00_47

.p2align	4
L$avx_00_47:
	subq	$-32*4,%rbp
	vmovdqu	(%r12),%xmm9
	movq	%r12,64+0(%rsp)
	vpalignr	$4,%xmm0,%xmm1,%xmm4
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%eax
	movl	%r9d,%r12d
	vpalignr	$4,%xmm2,%xmm3,%xmm7
	xorl	%r8d,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%r10d,%r12d
	vpsrld	$7,%xmm4,%xmm6
	shrdl	$5,%r13d,%r13d
	xorl	%eax,%r14d
	andl	%r8d,%r12d
	vpaddd	%xmm7,%xmm0,%xmm0
	vpxor	%xmm10,%xmm9,%xmm9
	vmovdqu	16-128(%rdi),%xmm10
	xorl	%r8d,%r13d
	addl	0(%rsp),%r11d
	movl	%eax,%r15d
	vpsrld	$3,%xmm4,%xmm7
	shrdl	$11,%r14d,%r14d
	xorl	%r10d,%r12d
	xorl	%ebx,%r15d
	vpslld	$14,%xmm4,%xmm5
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%r11d
	andl	%r15d,%esi
	vpxor	%xmm6,%xmm7,%xmm4
	xorl	%eax,%r14d
	addl	%r13d,%r11d
	xorl	%ebx,%esi
	vpshufd	$250,%xmm3,%xmm7
	addl	%r11d,%edx
	shrdl	$2,%r14d,%r14d
	addl	%esi,%r11d
	vpsrld	$11,%xmm6,%xmm6
	movl	%edx,%r13d
	addl	%r11d,%r14d
	shrdl	$14,%r13d,%r13d
	vpxor	%xmm5,%xmm4,%xmm4
	movl	%r14d,%r11d
	movl	%r8d,%r12d
	xorl	%edx,%r13d
	vpslld	$11,%xmm5,%xmm5
	shrdl	$9,%r14d,%r14d
	xorl	%r9d,%r12d
	shrdl	$5,%r13d,%r13d
	vpxor	%xmm6,%xmm4,%xmm4
	xorl	%r11d,%r14d
	andl	%edx,%r12d
	vpxor	%xmm8,%xmm9,%xmm9
	xorl	%edx,%r13d
	vpsrld	$10,%xmm7,%xmm6
	addl	4(%rsp),%r10d
	movl	%r11d,%esi
	shrdl	$11,%r14d,%r14d
	vpxor	%xmm5,%xmm4,%xmm4
	xorl	%r9d,%r12d
	xorl	%eax,%esi
	shrdl	$6,%r13d,%r13d
	vpsrlq	$17,%xmm7,%xmm7
	addl	%r12d,%r10d
	andl	%esi,%r15d
	xorl	%r11d,%r14d
	vpaddd	%xmm4,%xmm0,%xmm0
	addl	%r13d,%r10d
	xorl	%eax,%r15d
	addl	%r10d,%ecx
	vpxor	%xmm7,%xmm6,%xmm6
	shrdl	$2,%r14d,%r14d
	addl	%r15d,%r10d
	movl	%ecx,%r13d
	vpsrlq	$2,%xmm7,%xmm7
	addl	%r10d,%r14d
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%r10d
	vpxor	%xmm7,%xmm6,%xmm6
	movl	%edx,%r12d
	xorl	%ecx,%r13d
	shrdl	$9,%r14d,%r14d
	vpshufd	$132,%xmm6,%xmm6
	xorl	%r8d,%r12d
	shrdl	$5,%r13d,%r13d
	xorl	%r10d,%r14d
	vpsrldq	$8,%xmm6,%xmm6
	andl	%ecx,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	32-128(%rdi),%xmm10
	xorl	%ecx,%r13d
	addl	8(%rsp),%r9d
	vpaddd	%xmm6,%xmm0,%xmm0
	movl	%r10d,%r15d
	shrdl	$11,%r14d,%r14d
	xorl	%r8d,%r12d
	vpshufd	$80,%xmm0,%xmm7
	xorl	%r11d,%r15d
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%r9d
	vpsrld	$10,%xmm7,%xmm6
	andl	%r15d,%esi
	xorl	%r10d,%r14d
	addl	%r13d,%r9d
	vpsrlq	$17,%xmm7,%xmm7
	xorl	%r11d,%esi
	addl	%r9d,%ebx
	shrdl	$2,%r14d,%r14d
	vpxor	%xmm7,%xmm6,%xmm6
	addl	%esi,%r9d
	movl	%ebx,%r13d
	addl	%r9d,%r14d
	vpsrlq	$2,%xmm7,%xmm7
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%r9d
	movl	%ecx,%r12d
	vpxor	%xmm7,%xmm6,%xmm6
	xorl	%ebx,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%edx,%r12d
	vpshufd	$232,%xmm6,%xmm6
	shrdl	$5,%r13d,%r13d
	xorl	%r9d,%r14d
	andl	%ebx,%r12d
	vpslldq	$8,%xmm6,%xmm6
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	48-128(%rdi),%xmm10
	xorl	%ebx,%r13d
	addl	12(%rsp),%r8d
	movl	%r9d,%esi
	vpaddd	%xmm6,%xmm0,%xmm0
	shrdl	$11,%r14d,%r14d
	xorl	%edx,%r12d
	xorl	%r10d,%esi
	vpaddd	0(%rbp),%xmm0,%xmm6
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%r8d
	andl	%esi,%r15d
	xorl	%r9d,%r14d
	addl	%r13d,%r8d
	xorl	%r10d,%r15d
	addl	%r8d,%eax
	shrdl	$2,%r14d,%r14d
	addl	%r15d,%r8d
	movl	%eax,%r13d
	addl	%r8d,%r14d
	vmovdqa	%xmm6,0(%rsp)
	vpalignr	$4,%xmm1,%xmm2,%xmm4
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%r8d
	movl	%ebx,%r12d
	vpalignr	$4,%xmm3,%xmm0,%xmm7
	xorl	%eax,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%ecx,%r12d
	vpsrld	$7,%xmm4,%xmm6
	shrdl	$5,%r13d,%r13d
	xorl	%r8d,%r14d
	andl	%eax,%r12d
	vpaddd	%xmm7,%xmm1,%xmm1
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	64-128(%rdi),%xmm10
	xorl	%eax,%r13d
	addl	16(%rsp),%edx
	movl	%r8d,%r15d
	vpsrld	$3,%xmm4,%xmm7
	shrdl	$11,%r14d,%r14d
	xorl	%ecx,%r12d
	xorl	%r9d,%r15d
	vpslld	$14,%xmm4,%xmm5
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%edx
	andl	%r15d,%esi
	vpxor	%xmm6,%xmm7,%xmm4
	xorl	%r8d,%r14d
	addl	%r13d,%edx
	xorl	%r9d,%esi
	vpshufd	$250,%xmm0,%xmm7
	addl	%edx,%r11d
	shrdl	$2,%r14d,%r14d
	addl	%esi,%edx
	vpsrld	$11,%xmm6,%xmm6
	movl	%r11d,%r13d
	addl	%edx,%r14d
	shrdl	$14,%r13d,%r13d
	vpxor	%xmm5,%xmm4,%xmm4
	movl	%r14d,%edx
	movl	%eax,%r12d
	xorl	%r11d,%r13d
	vpslld	$11,%xmm5,%xmm5
	shrdl	$9,%r14d,%r14d
	xorl	%ebx,%r12d
	shrdl	$5,%r13d,%r13d
	vpxor	%xmm6,%xmm4,%xmm4
	xorl	%edx,%r14d
	andl	%r11d,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	80-128(%rdi),%xmm10
	xorl	%r11d,%r13d
	vpsrld	$10,%xmm7,%xmm6
	addl	20(%rsp),%ecx
	movl	%edx,%esi
	shrdl	$11,%r14d,%r14d
	vpxor	%xmm5,%xmm4,%xmm4
	xorl	%ebx,%r12d
	xorl	%r8d,%esi
	shrdl	$6,%r13d,%r13d
	vpsrlq	$17,%xmm7,%xmm7
	addl	%r12d,%ecx
	andl	%esi,%r15d
	xorl	%edx,%r14d
	vpaddd	%xmm4,%xmm1,%xmm1
	addl	%r13d,%ecx
	xorl	%r8d,%r15d
	addl	%ecx,%r10d
	vpxor	%xmm7,%xmm6,%xmm6
	shrdl	$2,%r14d,%r14d
	addl	%r15d,%ecx
	movl	%r10d,%r13d
	vpsrlq	$2,%xmm7,%xmm7
	addl	%ecx,%r14d
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%ecx
	vpxor	%xmm7,%xmm6,%xmm6
	movl	%r11d,%r12d
	xorl	%r10d,%r13d
	shrdl	$9,%r14d,%r14d
	vpshufd	$132,%xmm6,%xmm6
	xorl	%eax,%r12d
	shrdl	$5,%r13d,%r13d
	xorl	%ecx,%r14d
	vpsrldq	$8,%xmm6,%xmm6
	andl	%r10d,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	96-128(%rdi),%xmm10
	xorl	%r10d,%r13d
	addl	24(%rsp),%ebx
	vpaddd	%xmm6,%xmm1,%xmm1
	movl	%ecx,%r15d
	shrdl	$11,%r14d,%r14d
	xorl	%eax,%r12d
	vpshufd	$80,%xmm1,%xmm7
	xorl	%edx,%r15d
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%ebx
	vpsrld	$10,%xmm7,%xmm6
	andl	%r15d,%esi
	xorl	%ecx,%r14d
	addl	%r13d,%ebx
	vpsrlq	$17,%xmm7,%xmm7
	xorl	%edx,%esi
	addl	%ebx,%r9d
	shrdl	$2,%r14d,%r14d
	vpxor	%xmm7,%xmm6,%xmm6
	addl	%esi,%ebx
	movl	%r9d,%r13d
	addl	%ebx,%r14d
	vpsrlq	$2,%xmm7,%xmm7
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%ebx
	movl	%r10d,%r12d
	vpxor	%xmm7,%xmm6,%xmm6
	xorl	%r9d,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%r11d,%r12d
	vpshufd	$232,%xmm6,%xmm6
	shrdl	$5,%r13d,%r13d
	xorl	%ebx,%r14d
	andl	%r9d,%r12d
	vpslldq	$8,%xmm6,%xmm6
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	112-128(%rdi),%xmm10
	xorl	%r9d,%r13d
	addl	28(%rsp),%eax
	movl	%ebx,%esi
	vpaddd	%xmm6,%xmm1,%xmm1
	shrdl	$11,%r14d,%r14d
	xorl	%r11d,%r12d
	xorl	%ecx,%esi
	vpaddd	32(%rbp),%xmm1,%xmm6
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%eax
	andl	%esi,%r15d
	xorl	%ebx,%r14d
	addl	%r13d,%eax
	xorl	%ecx,%r15d
	addl	%eax,%r8d
	shrdl	$2,%r14d,%r14d
	addl	%r15d,%eax
	movl	%r8d,%r13d
	addl	%eax,%r14d
	vmovdqa	%xmm6,16(%rsp)
	vpalignr	$4,%xmm2,%xmm3,%xmm4
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%eax
	movl	%r9d,%r12d
	vpalignr	$4,%xmm0,%xmm1,%xmm7
	xorl	%r8d,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%r10d,%r12d
	vpsrld	$7,%xmm4,%xmm6
	shrdl	$5,%r13d,%r13d
	xorl	%eax,%r14d
	andl	%r8d,%r12d
	vpaddd	%xmm7,%xmm2,%xmm2
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	128-128(%rdi),%xmm10
	xorl	%r8d,%r13d
	addl	32(%rsp),%r11d
	movl	%eax,%r15d
	vpsrld	$3,%xmm4,%xmm7
	shrdl	$11,%r14d,%r14d
	xorl	%r10d,%r12d
	xorl	%ebx,%r15d
	vpslld	$14,%xmm4,%xmm5
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%r11d
	andl	%r15d,%esi
	vpxor	%xmm6,%xmm7,%xmm4
	xorl	%eax,%r14d
	addl	%r13d,%r11d
	xorl	%ebx,%esi
	vpshufd	$250,%xmm1,%xmm7
	addl	%r11d,%edx
	shrdl	$2,%r14d,%r14d
	addl	%esi,%r11d
	vpsrld	$11,%xmm6,%xmm6
	movl	%edx,%r13d
	addl	%r11d,%r14d
	shrdl	$14,%r13d,%r13d
	vpxor	%xmm5,%xmm4,%xmm4
	movl	%r14d,%r11d
	movl	%r8d,%r12d
	xorl	%edx,%r13d
	vpslld	$11,%xmm5,%xmm5
	shrdl	$9,%r14d,%r14d
	xorl	%r9d,%r12d
	shrdl	$5,%r13d,%r13d
	vpxor	%xmm6,%xmm4,%xmm4
	xorl	%r11d,%r14d
	andl	%edx,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	144-128(%rdi),%xmm10
	xorl	%edx,%r13d
	vpsrld	$10,%xmm7,%xmm6
	addl	36(%rsp),%r10d
	movl	%r11d,%esi
	shrdl	$11,%r14d,%r14d
	vpxor	%xmm5,%xmm4,%xmm4
	xorl	%r9d,%r12d
	xorl	%eax,%esi
	shrdl	$6,%r13d,%r13d
	vpsrlq	$17,%xmm7,%xmm7
	addl	%r12d,%r10d
	andl	%esi,%r15d
	xorl	%r11d,%r14d
	vpaddd	%xmm4,%xmm2,%xmm2
	addl	%r13d,%r10d
	xorl	%eax,%r15d
	addl	%r10d,%ecx
	vpxor	%xmm7,%xmm6,%xmm6
	shrdl	$2,%r14d,%r14d
	addl	%r15d,%r10d
	movl	%ecx,%r13d
	vpsrlq	$2,%xmm7,%xmm7
	addl	%r10d,%r14d
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%r10d
	vpxor	%xmm7,%xmm6,%xmm6
	movl	%edx,%r12d
	xorl	%ecx,%r13d
	shrdl	$9,%r14d,%r14d
	vpshufd	$132,%xmm6,%xmm6
	xorl	%r8d,%r12d
	shrdl	$5,%r13d,%r13d
	xorl	%r10d,%r14d
	vpsrldq	$8,%xmm6,%xmm6
	andl	%ecx,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	160-128(%rdi),%xmm10
	xorl	%ecx,%r13d
	addl	40(%rsp),%r9d
	vpaddd	%xmm6,%xmm2,%xmm2
	movl	%r10d,%r15d
	shrdl	$11,%r14d,%r14d
	xorl	%r8d,%r12d
	vpshufd	$80,%xmm2,%xmm7
	xorl	%r11d,%r15d
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%r9d
	vpsrld	$10,%xmm7,%xmm6
	andl	%r15d,%esi
	xorl	%r10d,%r14d
	addl	%r13d,%r9d
	vpsrlq	$17,%xmm7,%xmm7
	xorl	%r11d,%esi
	addl	%r9d,%ebx
	shrdl	$2,%r14d,%r14d
	vpxor	%xmm7,%xmm6,%xmm6
	addl	%esi,%r9d
	movl	%ebx,%r13d
	addl	%r9d,%r14d
	vpsrlq	$2,%xmm7,%xmm7
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%r9d
	movl	%ecx,%r12d
	vpxor	%xmm7,%xmm6,%xmm6
	xorl	%ebx,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%edx,%r12d
	vpshufd	$232,%xmm6,%xmm6
	shrdl	$5,%r13d,%r13d
	xorl	%r9d,%r14d
	andl	%ebx,%r12d
	vpslldq	$8,%xmm6,%xmm6
	vaesenclast	%xmm10,%xmm9,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	176-128(%rdi),%xmm10
	xorl	%ebx,%r13d
	addl	44(%rsp),%r8d
	movl	%r9d,%esi
	vpaddd	%xmm6,%xmm2,%xmm2
	shrdl	$11,%r14d,%r14d
	xorl	%edx,%r12d
	xorl	%r10d,%esi
	vpaddd	64(%rbp),%xmm2,%xmm6
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%r8d
	andl	%esi,%r15d
	xorl	%r9d,%r14d
	addl	%r13d,%r8d
	xorl	%r10d,%r15d
	addl	%r8d,%eax
	shrdl	$2,%r14d,%r14d
	addl	%r15d,%r8d
	movl	%eax,%r13d
	addl	%r8d,%r14d
	vmovdqa	%xmm6,32(%rsp)
	vpalignr	$4,%xmm3,%xmm0,%xmm4
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%r8d
	movl	%ebx,%r12d
	vpalignr	$4,%xmm1,%xmm2,%xmm7
	xorl	%eax,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%ecx,%r12d
	vpsrld	$7,%xmm4,%xmm6
	shrdl	$5,%r13d,%r13d
	xorl	%r8d,%r14d
	andl	%eax,%r12d
	vpaddd	%xmm7,%xmm3,%xmm3
	vpand	%xmm12,%xmm11,%xmm8
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	192-128(%rdi),%xmm10
	xorl	%eax,%r13d
	addl	48(%rsp),%edx
	movl	%r8d,%r15d
	vpsrld	$3,%xmm4,%xmm7
	shrdl	$11,%r14d,%r14d
	xorl	%ecx,%r12d
	xorl	%r9d,%r15d
	vpslld	$14,%xmm4,%xmm5
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%edx
	andl	%r15d,%esi
	vpxor	%xmm6,%xmm7,%xmm4
	xorl	%r8d,%r14d
	addl	%r13d,%edx
	xorl	%r9d,%esi
	vpshufd	$250,%xmm2,%xmm7
	addl	%edx,%r11d
	shrdl	$2,%r14d,%r14d
	addl	%esi,%edx
	vpsrld	$11,%xmm6,%xmm6
	movl	%r11d,%r13d
	addl	%edx,%r14d
	shrdl	$14,%r13d,%r13d
	vpxor	%xmm5,%xmm4,%xmm4
	movl	%r14d,%edx
	movl	%eax,%r12d
	xorl	%r11d,%r13d
	vpslld	$11,%xmm5,%xmm5
	shrdl	$9,%r14d,%r14d
	xorl	%ebx,%r12d
	shrdl	$5,%r13d,%r13d
	vpxor	%xmm6,%xmm4,%xmm4
	xorl	%edx,%r14d
	andl	%r11d,%r12d
	vaesenclast	%xmm10,%xmm9,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	208-128(%rdi),%xmm10
	xorl	%r11d,%r13d
	vpsrld	$10,%xmm7,%xmm6
	addl	52(%rsp),%ecx
	movl	%edx,%esi
	shrdl	$11,%r14d,%r14d
	vpxor	%xmm5,%xmm4,%xmm4
	xorl	%ebx,%r12d
	xorl	%r8d,%esi
	shrdl	$6,%r13d,%r13d
	vpsrlq	$17,%xmm7,%xmm7
	addl	%r12d,%ecx
	andl	%esi,%r15d
	xorl	%edx,%r14d
	vpaddd	%xmm4,%xmm3,%xmm3
	addl	%r13d,%ecx
	xorl	%r8d,%r15d
	addl	%ecx,%r10d
	vpxor	%xmm7,%xmm6,%xmm6
	shrdl	$2,%r14d,%r14d
	addl	%r15d,%ecx
	movl	%r10d,%r13d
	vpsrlq	$2,%xmm7,%xmm7
	addl	%ecx,%r14d
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%ecx
	vpxor	%xmm7,%xmm6,%xmm6
	movl	%r11d,%r12d
	xorl	%r10d,%r13d
	shrdl	$9,%r14d,%r14d
	vpshufd	$132,%xmm6,%xmm6
	xorl	%eax,%r12d
	shrdl	$5,%r13d,%r13d
	xorl	%ecx,%r14d
	vpsrldq	$8,%xmm6,%xmm6
	andl	%r10d,%r12d
	vpand	%xmm13,%xmm11,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	224-128(%rdi),%xmm10
	xorl	%r10d,%r13d
	addl	56(%rsp),%ebx
	vpaddd	%xmm6,%xmm3,%xmm3
	movl	%ecx,%r15d
	shrdl	$11,%r14d,%r14d
	xorl	%eax,%r12d
	vpshufd	$80,%xmm3,%xmm7
	xorl	%edx,%r15d
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%ebx
	vpsrld	$10,%xmm7,%xmm6
	andl	%r15d,%esi
	xorl	%ecx,%r14d
	addl	%r13d,%ebx
	vpsrlq	$17,%xmm7,%xmm7
	xorl	%edx,%esi
	addl	%ebx,%r9d
	shrdl	$2,%r14d,%r14d
	vpxor	%xmm7,%xmm6,%xmm6
	addl	%esi,%ebx
	movl	%r9d,%r13d
	addl	%ebx,%r14d
	vpsrlq	$2,%xmm7,%xmm7
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%ebx
	movl	%r10d,%r12d
	vpxor	%xmm7,%xmm6,%xmm6
	xorl	%r9d,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%r11d,%r12d
	vpshufd	$232,%xmm6,%xmm6
	shrdl	$5,%r13d,%r13d
	xorl	%ebx,%r14d
	andl	%r9d,%r12d
	vpslldq	$8,%xmm6,%xmm6
	vpor	%xmm11,%xmm8,%xmm8
	vaesenclast	%xmm10,%xmm9,%xmm11
	vmovdqu	0-128(%rdi),%xmm10
	xorl	%r9d,%r13d
	addl	60(%rsp),%eax
	movl	%ebx,%esi
	vpaddd	%xmm6,%xmm3,%xmm3
	shrdl	$11,%r14d,%r14d
	xorl	%r11d,%r12d
	xorl	%ecx,%esi
	vpaddd	96(%rbp),%xmm3,%xmm6
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%eax
	andl	%esi,%r15d
	xorl	%ebx,%r14d
	addl	%r13d,%eax
	xorl	%ecx,%r15d
	addl	%eax,%r8d
	shrdl	$2,%r14d,%r14d
	addl	%r15d,%eax
	movl	%r8d,%r13d
	addl	%eax,%r14d
	vmovdqa	%xmm6,48(%rsp)
	movq	64+0(%rsp),%r12
	vpand	%xmm14,%xmm11,%xmm11
	movq	64+8(%rsp),%r15
	vpor	%xmm11,%xmm8,%xmm8
	vmovdqu	%xmm8,(%r15,%r12,1)
	leaq	16(%r12),%r12
	cmpb	$0,131(%rbp)
	jne	L$avx_00_47
	vmovdqu	(%r12),%xmm9
	movq	%r12,64+0(%rsp)
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%eax
	movl	%r9d,%r12d
	xorl	%r8d,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%r10d,%r12d
	shrdl	$5,%r13d,%r13d
	xorl	%eax,%r14d
	andl	%r8d,%r12d
	vpxor	%xmm10,%xmm9,%xmm9
	vmovdqu	16-128(%rdi),%xmm10
	xorl	%r8d,%r13d
	addl	0(%rsp),%r11d
	movl	%eax,%r15d
	shrdl	$11,%r14d,%r14d
	xorl	%r10d,%r12d
	xorl	%ebx,%r15d
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%r11d
	andl	%r15d,%esi
	xorl	%eax,%r14d
	addl	%r13d,%r11d
	xorl	%ebx,%esi
	addl	%r11d,%edx
	shrdl	$2,%r14d,%r14d
	addl	%esi,%r11d
	movl	%edx,%r13d
	addl	%r11d,%r14d
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%r11d
	movl	%r8d,%r12d
	xorl	%edx,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%r9d,%r12d
	shrdl	$5,%r13d,%r13d
	xorl	%r11d,%r14d
	andl	%edx,%r12d
	vpxor	%xmm8,%xmm9,%xmm9
	xorl	%edx,%r13d
	addl	4(%rsp),%r10d
	movl	%r11d,%esi
	shrdl	$11,%r14d,%r14d
	xorl	%r9d,%r12d
	xorl	%eax,%esi
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%r10d
	andl	%esi,%r15d
	xorl	%r11d,%r14d
	addl	%r13d,%r10d
	xorl	%eax,%r15d
	addl	%r10d,%ecx
	shrdl	$2,%r14d,%r14d
	addl	%r15d,%r10d
	movl	%ecx,%r13d
	addl	%r10d,%r14d
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%r10d
	movl	%edx,%r12d
	xorl	%ecx,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%r8d,%r12d
	shrdl	$5,%r13d,%r13d
	xorl	%r10d,%r14d
	andl	%ecx,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	32-128(%rdi),%xmm10
	xorl	%ecx,%r13d
	addl	8(%rsp),%r9d
	movl	%r10d,%r15d
	shrdl	$11,%r14d,%r14d
	xorl	%r8d,%r12d
	xorl	%r11d,%r15d
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%r9d
	andl	%r15d,%esi
	xorl	%r10d,%r14d
	addl	%r13d,%r9d
	xorl	%r11d,%esi
	addl	%r9d,%ebx
	shrdl	$2,%r14d,%r14d
	addl	%esi,%r9d
	movl	%ebx,%r13d
	addl	%r9d,%r14d
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%r9d
	movl	%ecx,%r12d
	xorl	%ebx,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%edx,%r12d
	shrdl	$5,%r13d,%r13d
	xorl	%r9d,%r14d
	andl	%ebx,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	48-128(%rdi),%xmm10
	xorl	%ebx,%r13d
	addl	12(%rsp),%r8d
	movl	%r9d,%esi
	shrdl	$11,%r14d,%r14d
	xorl	%edx,%r12d
	xorl	%r10d,%esi
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%r8d
	andl	%esi,%r15d
	xorl	%r9d,%r14d
	addl	%r13d,%r8d
	xorl	%r10d,%r15d
	addl	%r8d,%eax
	shrdl	$2,%r14d,%r14d
	addl	%r15d,%r8d
	movl	%eax,%r13d
	addl	%r8d,%r14d
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%r8d
	movl	%ebx,%r12d
	xorl	%eax,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%ecx,%r12d
	shrdl	$5,%r13d,%r13d
	xorl	%r8d,%r14d
	andl	%eax,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	64-128(%rdi),%xmm10
	xorl	%eax,%r13d
	addl	16(%rsp),%edx
	movl	%r8d,%r15d
	shrdl	$11,%r14d,%r14d
	xorl	%ecx,%r12d
	xorl	%r9d,%r15d
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%edx
	andl	%r15d,%esi
	xorl	%r8d,%r14d
	addl	%r13d,%edx
	xorl	%r9d,%esi
	addl	%edx,%r11d
	shrdl	$2,%r14d,%r14d
	addl	%esi,%edx
	movl	%r11d,%r13d
	addl	%edx,%r14d
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%edx
	movl	%eax,%r12d
	xorl	%r11d,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%ebx,%r12d
	shrdl	$5,%r13d,%r13d
	xorl	%edx,%r14d
	andl	%r11d,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	80-128(%rdi),%xmm10
	xorl	%r11d,%r13d
	addl	20(%rsp),%ecx
	movl	%edx,%esi
	shrdl	$11,%r14d,%r14d
	xorl	%ebx,%r12d
	xorl	%r8d,%esi
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%ecx
	andl	%esi,%r15d
	xorl	%edx,%r14d
	addl	%r13d,%ecx
	xorl	%r8d,%r15d
	addl	%ecx,%r10d
	shrdl	$2,%r14d,%r14d
	addl	%r15d,%ecx
	movl	%r10d,%r13d
	addl	%ecx,%r14d
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%ecx
	movl	%r11d,%r12d
	xorl	%r10d,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%eax,%r12d
	shrdl	$5,%r13d,%r13d
	xorl	%ecx,%r14d
	andl	%r10d,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	96-128(%rdi),%xmm10
	xorl	%r10d,%r13d
	addl	24(%rsp),%ebx
	movl	%ecx,%r15d
	shrdl	$11,%r14d,%r14d
	xorl	%eax,%r12d
	xorl	%edx,%r15d
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%ebx
	andl	%r15d,%esi
	xorl	%ecx,%r14d
	addl	%r13d,%ebx
	xorl	%edx,%esi
	addl	%ebx,%r9d
	shrdl	$2,%r14d,%r14d
	addl	%esi,%ebx
	movl	%r9d,%r13d
	addl	%ebx,%r14d
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%ebx
	movl	%r10d,%r12d
	xorl	%r9d,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%r11d,%r12d
	shrdl	$5,%r13d,%r13d
	xorl	%ebx,%r14d
	andl	%r9d,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	112-128(%rdi),%xmm10
	xorl	%r9d,%r13d
	addl	28(%rsp),%eax
	movl	%ebx,%esi
	shrdl	$11,%r14d,%r14d
	xorl	%r11d,%r12d
	xorl	%ecx,%esi
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%eax
	andl	%esi,%r15d
	xorl	%ebx,%r14d
	addl	%r13d,%eax
	xorl	%ecx,%r15d
	addl	%eax,%r8d
	shrdl	$2,%r14d,%r14d
	addl	%r15d,%eax
	movl	%r8d,%r13d
	addl	%eax,%r14d
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%eax
	movl	%r9d,%r12d
	xorl	%r8d,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%r10d,%r12d
	shrdl	$5,%r13d,%r13d
	xorl	%eax,%r14d
	andl	%r8d,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	128-128(%rdi),%xmm10
	xorl	%r8d,%r13d
	addl	32(%rsp),%r11d
	movl	%eax,%r15d
	shrdl	$11,%r14d,%r14d
	xorl	%r10d,%r12d
	xorl	%ebx,%r15d
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%r11d
	andl	%r15d,%esi
	xorl	%eax,%r14d
	addl	%r13d,%r11d
	xorl	%ebx,%esi
	addl	%r11d,%edx
	shrdl	$2,%r14d,%r14d
	addl	%esi,%r11d
	movl	%edx,%r13d
	addl	%r11d,%r14d
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%r11d
	movl	%r8d,%r12d
	xorl	%edx,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%r9d,%r12d
	shrdl	$5,%r13d,%r13d
	xorl	%r11d,%r14d
	andl	%edx,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	144-128(%rdi),%xmm10
	xorl	%edx,%r13d
	addl	36(%rsp),%r10d
	movl	%r11d,%esi
	shrdl	$11,%r14d,%r14d
	xorl	%r9d,%r12d
	xorl	%eax,%esi
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%r10d
	andl	%esi,%r15d
	xorl	%r11d,%r14d
	addl	%r13d,%r10d
	xorl	%eax,%r15d
	addl	%r10d,%ecx
	shrdl	$2,%r14d,%r14d
	addl	%r15d,%r10d
	movl	%ecx,%r13d
	addl	%r10d,%r14d
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%r10d
	movl	%edx,%r12d
	xorl	%ecx,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%r8d,%r12d
	shrdl	$5,%r13d,%r13d
	xorl	%r10d,%r14d
	andl	%ecx,%r12d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	160-128(%rdi),%xmm10
	xorl	%ecx,%r13d
	addl	40(%rsp),%r9d
	movl	%r10d,%r15d
	shrdl	$11,%r14d,%r14d
	xorl	%r8d,%r12d
	xorl	%r11d,%r15d
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%r9d
	andl	%r15d,%esi
	xorl	%r10d,%r14d
	addl	%r13d,%r9d
	xorl	%r11d,%esi
	addl	%r9d,%ebx
	shrdl	$2,%r14d,%r14d
	addl	%esi,%r9d
	movl	%ebx,%r13d
	addl	%r9d,%r14d
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%r9d
	movl	%ecx,%r12d
	xorl	%ebx,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%edx,%r12d
	shrdl	$5,%r13d,%r13d
	xorl	%r9d,%r14d
	andl	%ebx,%r12d
	vaesenclast	%xmm10,%xmm9,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	176-128(%rdi),%xmm10
	xorl	%ebx,%r13d
	addl	44(%rsp),%r8d
	movl	%r9d,%esi
	shrdl	$11,%r14d,%r14d
	xorl	%edx,%r12d
	xorl	%r10d,%esi
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%r8d
	andl	%esi,%r15d
	xorl	%r9d,%r14d
	addl	%r13d,%r8d
	xorl	%r10d,%r15d
	addl	%r8d,%eax
	shrdl	$2,%r14d,%r14d
	addl	%r15d,%r8d
	movl	%eax,%r13d
	addl	%r8d,%r14d
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%r8d
	movl	%ebx,%r12d
	xorl	%eax,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%ecx,%r12d
	shrdl	$5,%r13d,%r13d
	xorl	%r8d,%r14d
	andl	%eax,%r12d
	vpand	%xmm12,%xmm11,%xmm8
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	192-128(%rdi),%xmm10
	xorl	%eax,%r13d
	addl	48(%rsp),%edx
	movl	%r8d,%r15d
	shrdl	$11,%r14d,%r14d
	xorl	%ecx,%r12d
	xorl	%r9d,%r15d
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%edx
	andl	%r15d,%esi
	xorl	%r8d,%r14d
	addl	%r13d,%edx
	xorl	%r9d,%esi
	addl	%edx,%r11d
	shrdl	$2,%r14d,%r14d
	addl	%esi,%edx
	movl	%r11d,%r13d
	addl	%edx,%r14d
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%edx
	movl	%eax,%r12d
	xorl	%r11d,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%ebx,%r12d
	shrdl	$5,%r13d,%r13d
	xorl	%edx,%r14d
	andl	%r11d,%r12d
	vaesenclast	%xmm10,%xmm9,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	208-128(%rdi),%xmm10
	xorl	%r11d,%r13d
	addl	52(%rsp),%ecx
	movl	%edx,%esi
	shrdl	$11,%r14d,%r14d
	xorl	%ebx,%r12d
	xorl	%r8d,%esi
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%ecx
	andl	%esi,%r15d
	xorl	%edx,%r14d
	addl	%r13d,%ecx
	xorl	%r8d,%r15d
	addl	%ecx,%r10d
	shrdl	$2,%r14d,%r14d
	addl	%r15d,%ecx
	movl	%r10d,%r13d
	addl	%ecx,%r14d
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%ecx
	movl	%r11d,%r12d
	xorl	%r10d,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%eax,%r12d
	shrdl	$5,%r13d,%r13d
	xorl	%ecx,%r14d
	andl	%r10d,%r12d
	vpand	%xmm13,%xmm11,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	224-128(%rdi),%xmm10
	xorl	%r10d,%r13d
	addl	56(%rsp),%ebx
	movl	%ecx,%r15d
	shrdl	$11,%r14d,%r14d
	xorl	%eax,%r12d
	xorl	%edx,%r15d
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%ebx
	andl	%r15d,%esi
	xorl	%ecx,%r14d
	addl	%r13d,%ebx
	xorl	%edx,%esi
	addl	%ebx,%r9d
	shrdl	$2,%r14d,%r14d
	addl	%esi,%ebx
	movl	%r9d,%r13d
	addl	%ebx,%r14d
	shrdl	$14,%r13d,%r13d
	movl	%r14d,%ebx
	movl	%r10d,%r12d
	xorl	%r9d,%r13d
	shrdl	$9,%r14d,%r14d
	xorl	%r11d,%r12d
	shrdl	$5,%r13d,%r13d
	xorl	%ebx,%r14d
	andl	%r9d,%r12d
	vpor	%xmm11,%xmm8,%xmm8
	vaesenclast	%xmm10,%xmm9,%xmm11
	vmovdqu	0-128(%rdi),%xmm10
	xorl	%r9d,%r13d
	addl	60(%rsp),%eax
	movl	%ebx,%esi
	shrdl	$11,%r14d,%r14d
	xorl	%r11d,%r12d
	xorl	%ecx,%esi
	shrdl	$6,%r13d,%r13d
	addl	%r12d,%eax
	andl	%esi,%r15d
	xorl	%ebx,%r14d
	addl	%r13d,%eax
	xorl	%ecx,%r15d
	addl	%eax,%r8d
	shrdl	$2,%r14d,%r14d
	addl	%r15d,%eax
	movl	%r8d,%r13d
	addl	%eax,%r14d
	movq	64+0(%rsp),%r12
	movq	64+8(%rsp),%r13
	movq	64+40(%rsp),%r15
	movq	64+48(%rsp),%rsi

	vpand	%xmm14,%xmm11,%xmm11
	movl	%r14d,%eax
	vpor	%xmm11,%xmm8,%xmm8
	vmovdqu	%xmm8,(%r12,%r13,1)
	leaq	16(%r12),%r12

	addl	0(%r15),%eax
	addl	4(%r15),%ebx
	addl	8(%r15),%ecx
	addl	12(%r15),%edx
	addl	16(%r15),%r8d
	addl	20(%r15),%r9d
	addl	24(%r15),%r10d
	addl	28(%r15),%r11d

	cmpq	64+16(%rsp),%r12

	movl	%eax,0(%r15)
	movl	%ebx,4(%r15)
	movl	%ecx,8(%r15)
	movl	%edx,12(%r15)
	movl	%r8d,16(%r15)
	movl	%r9d,20(%r15)
	movl	%r10d,24(%r15)
	movl	%r11d,28(%r15)
	jb	L$loop_avx

	movq	64+32(%rsp),%r8
	movq	64+56(%rsp),%rsi
	vmovdqu	%xmm8,(%r8)
	vzeroall
	movq	(%rsi),%r15
	movq	8(%rsi),%r14
	movq	16(%rsi),%r13
	movq	24(%rsi),%r12
	movq	32(%rsi),%rbp
	movq	40(%rsi),%rbx
	leaq	48(%rsi),%rsp
L$epilogue_avx:
	.byte	0xf3,0xc3


.p2align	6
aesni_cbc_sha256_enc_avx2:
L$avx2_shortcut:
	movq	8(%rsp),%r10
	pushq	%rbx
	pushq	%rbp
	pushq	%r12
	pushq	%r13
	pushq	%r14
	pushq	%r15
	movq	%rsp,%r11
	subq	$576,%rsp
	andq	$-1024,%rsp
	addq	$448,%rsp

	shlq	$6,%rdx
	subq	%rdi,%rsi
	subq	%rdi,%r10
	addq	%rdi,%rdx



	movq	%rdx,64+16(%rsp)

	movq	%r8,64+32(%rsp)
	movq	%r9,64+40(%rsp)
	movq	%r10,64+48(%rsp)
	movq	%r11,64+56(%rsp)
L$prologue_avx2:
	vzeroall

	movq	%rdi,%r13
	vpinsrq	$1,%rsi,%xmm15,%xmm15
	leaq	128(%rcx),%rdi
	leaq	K256+544(%rip),%r12
	movl	240-128(%rdi),%r14d
	movq	%r9,%r15
	movq	%r10,%rsi
	vmovdqu	(%r8),%xmm8
	leaq	-9(%r14),%r14

	vmovdqa	0(%r12,%r14,8),%xmm14
	vmovdqa	16(%r12,%r14,8),%xmm13
	vmovdqa	32(%r12,%r14,8),%xmm12

	subq	$-64,%r13
	movl	0(%r15),%eax
	leaq	(%rsi,%r13,1),%r12
	movl	4(%r15),%ebx
	cmpq	%rdx,%r13
	movl	8(%r15),%ecx
	cmoveq	%rsp,%r12
	movl	12(%r15),%edx
	movl	16(%r15),%r8d
	movl	20(%r15),%r9d
	movl	24(%r15),%r10d
	movl	28(%r15),%r11d
	vmovdqu	0-128(%rdi),%xmm10
	jmp	L$oop_avx2
.p2align	4
L$oop_avx2:
	vmovdqa	K256+512(%rip),%ymm7
	vmovdqu	-64+0(%rsi,%r13,1),%xmm0
	vmovdqu	-64+16(%rsi,%r13,1),%xmm1
	vmovdqu	-64+32(%rsi,%r13,1),%xmm2
	vmovdqu	-64+48(%rsi,%r13,1),%xmm3

	vinserti128	$1,(%r12),%ymm0,%ymm0
	vinserti128	$1,16(%r12),%ymm1,%ymm1
	vpshufb	%ymm7,%ymm0,%ymm0
	vinserti128	$1,32(%r12),%ymm2,%ymm2
	vpshufb	%ymm7,%ymm1,%ymm1
	vinserti128	$1,48(%r12),%ymm3,%ymm3

	leaq	K256(%rip),%rbp
	vpshufb	%ymm7,%ymm2,%ymm2
	leaq	-64(%r13),%r13
	vpaddd	0(%rbp),%ymm0,%ymm4
	vpshufb	%ymm7,%ymm3,%ymm3
	vpaddd	32(%rbp),%ymm1,%ymm5
	vpaddd	64(%rbp),%ymm2,%ymm6
	vpaddd	96(%rbp),%ymm3,%ymm7
	vmovdqa	%ymm4,0(%rsp)
	xorl	%r14d,%r14d
	vmovdqa	%ymm5,32(%rsp)
	leaq	-64(%rsp),%rsp
	movl	%ebx,%esi
	vmovdqa	%ymm6,0(%rsp)
	xorl	%ecx,%esi
	vmovdqa	%ymm7,32(%rsp)
	movl	%r9d,%r12d
	subq	$-32*4,%rbp
	jmp	L$avx2_00_47

.p2align	4
L$avx2_00_47:
	vmovdqu	(%r13),%xmm9
	vpinsrq	$0,%r13,%xmm15,%xmm15
	leaq	-64(%rsp),%rsp
	vpalignr	$4,%ymm0,%ymm1,%ymm4
	addl	0+128(%rsp),%r11d
	andl	%r8d,%r12d
	rorxl	$25,%r8d,%r13d
	vpalignr	$4,%ymm2,%ymm3,%ymm7
	rorxl	$11,%r8d,%r15d
	leal	(%rax,%r14,1),%eax
	leal	(%r11,%r12,1),%r11d
	vpsrld	$7,%ymm4,%ymm6
	andnl	%r10d,%r8d,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%r8d,%r14d
	vpaddd	%ymm7,%ymm0,%ymm0
	leal	(%r11,%r12,1),%r11d
	xorl	%r14d,%r13d
	movl	%eax,%r15d
	vpsrld	$3,%ymm4,%ymm7
	rorxl	$22,%eax,%r12d
	leal	(%r11,%r13,1),%r11d
	xorl	%ebx,%r15d
	vpslld	$14,%ymm4,%ymm5
	rorxl	$13,%eax,%r14d
	rorxl	$2,%eax,%r13d
	leal	(%rdx,%r11,1),%edx
	vpxor	%ymm6,%ymm7,%ymm4
	andl	%r15d,%esi
	vpxor	%xmm10,%xmm9,%xmm9
	vmovdqu	16-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%ebx,%esi
	vpshufd	$250,%ymm3,%ymm7
	xorl	%r13d,%r14d
	leal	(%r11,%rsi,1),%r11d
	movl	%r8d,%r12d
	vpsrld	$11,%ymm6,%ymm6
	addl	4+128(%rsp),%r10d
	andl	%edx,%r12d
	rorxl	$25,%edx,%r13d
	vpxor	%ymm5,%ymm4,%ymm4
	rorxl	$11,%edx,%esi
	leal	(%r11,%r14,1),%r11d
	leal	(%r10,%r12,1),%r10d
	vpslld	$11,%ymm5,%ymm5
	andnl	%r9d,%edx,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%edx,%r14d
	vpxor	%ymm6,%ymm4,%ymm4
	leal	(%r10,%r12,1),%r10d
	xorl	%r14d,%r13d
	movl	%r11d,%esi
	vpsrld	$10,%ymm7,%ymm6
	rorxl	$22,%r11d,%r12d
	leal	(%r10,%r13,1),%r10d
	xorl	%eax,%esi
	vpxor	%ymm5,%ymm4,%ymm4
	rorxl	$13,%r11d,%r14d
	rorxl	$2,%r11d,%r13d
	leal	(%rcx,%r10,1),%ecx
	vpsrlq	$17,%ymm7,%ymm7
	andl	%esi,%r15d
	vpxor	%xmm8,%xmm9,%xmm9
	xorl	%r12d,%r14d
	xorl	%eax,%r15d
	vpaddd	%ymm4,%ymm0,%ymm0
	xorl	%r13d,%r14d
	leal	(%r10,%r15,1),%r10d
	movl	%edx,%r12d
	vpxor	%ymm7,%ymm6,%ymm6
	addl	8+128(%rsp),%r9d
	andl	%ecx,%r12d
	rorxl	$25,%ecx,%r13d
	vpsrlq	$2,%ymm7,%ymm7
	rorxl	$11,%ecx,%r15d
	leal	(%r10,%r14,1),%r10d
	leal	(%r9,%r12,1),%r9d
	vpxor	%ymm7,%ymm6,%ymm6
	andnl	%r8d,%ecx,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%ecx,%r14d
	vpshufd	$132,%ymm6,%ymm6
	leal	(%r9,%r12,1),%r9d
	xorl	%r14d,%r13d
	movl	%r10d,%r15d
	vpsrldq	$8,%ymm6,%ymm6
	rorxl	$22,%r10d,%r12d
	leal	(%r9,%r13,1),%r9d
	xorl	%r11d,%r15d
	vpaddd	%ymm6,%ymm0,%ymm0
	rorxl	$13,%r10d,%r14d
	rorxl	$2,%r10d,%r13d
	leal	(%rbx,%r9,1),%ebx
	vpshufd	$80,%ymm0,%ymm7
	andl	%r15d,%esi
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	32-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r11d,%esi
	vpsrld	$10,%ymm7,%ymm6
	xorl	%r13d,%r14d
	leal	(%r9,%rsi,1),%r9d
	movl	%ecx,%r12d
	vpsrlq	$17,%ymm7,%ymm7
	addl	12+128(%rsp),%r8d
	andl	%ebx,%r12d
	rorxl	$25,%ebx,%r13d
	vpxor	%ymm7,%ymm6,%ymm6
	rorxl	$11,%ebx,%esi
	leal	(%r9,%r14,1),%r9d
	leal	(%r8,%r12,1),%r8d
	vpsrlq	$2,%ymm7,%ymm7
	andnl	%edx,%ebx,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%ebx,%r14d
	vpxor	%ymm7,%ymm6,%ymm6
	leal	(%r8,%r12,1),%r8d
	xorl	%r14d,%r13d
	movl	%r9d,%esi
	vpshufd	$232,%ymm6,%ymm6
	rorxl	$22,%r9d,%r12d
	leal	(%r8,%r13,1),%r8d
	xorl	%r10d,%esi
	vpslldq	$8,%ymm6,%ymm6
	rorxl	$13,%r9d,%r14d
	rorxl	$2,%r9d,%r13d
	leal	(%rax,%r8,1),%eax
	vpaddd	%ymm6,%ymm0,%ymm0
	andl	%esi,%r15d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	48-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r10d,%r15d
	vpaddd	0(%rbp),%ymm0,%ymm6
	xorl	%r13d,%r14d
	leal	(%r8,%r15,1),%r8d
	movl	%ebx,%r12d
	vmovdqa	%ymm6,0(%rsp)
	vpalignr	$4,%ymm1,%ymm2,%ymm4
	addl	32+128(%rsp),%edx
	andl	%eax,%r12d
	rorxl	$25,%eax,%r13d
	vpalignr	$4,%ymm3,%ymm0,%ymm7
	rorxl	$11,%eax,%r15d
	leal	(%r8,%r14,1),%r8d
	leal	(%rdx,%r12,1),%edx
	vpsrld	$7,%ymm4,%ymm6
	andnl	%ecx,%eax,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%eax,%r14d
	vpaddd	%ymm7,%ymm1,%ymm1
	leal	(%rdx,%r12,1),%edx
	xorl	%r14d,%r13d
	movl	%r8d,%r15d
	vpsrld	$3,%ymm4,%ymm7
	rorxl	$22,%r8d,%r12d
	leal	(%rdx,%r13,1),%edx
	xorl	%r9d,%r15d
	vpslld	$14,%ymm4,%ymm5
	rorxl	$13,%r8d,%r14d
	rorxl	$2,%r8d,%r13d
	leal	(%r11,%rdx,1),%r11d
	vpxor	%ymm6,%ymm7,%ymm4
	andl	%r15d,%esi
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	64-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r9d,%esi
	vpshufd	$250,%ymm0,%ymm7
	xorl	%r13d,%r14d
	leal	(%rdx,%rsi,1),%edx
	movl	%eax,%r12d
	vpsrld	$11,%ymm6,%ymm6
	addl	36+128(%rsp),%ecx
	andl	%r11d,%r12d
	rorxl	$25,%r11d,%r13d
	vpxor	%ymm5,%ymm4,%ymm4
	rorxl	$11,%r11d,%esi
	leal	(%rdx,%r14,1),%edx
	leal	(%rcx,%r12,1),%ecx
	vpslld	$11,%ymm5,%ymm5
	andnl	%ebx,%r11d,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%r11d,%r14d
	vpxor	%ymm6,%ymm4,%ymm4
	leal	(%rcx,%r12,1),%ecx
	xorl	%r14d,%r13d
	movl	%edx,%esi
	vpsrld	$10,%ymm7,%ymm6
	rorxl	$22,%edx,%r12d
	leal	(%rcx,%r13,1),%ecx
	xorl	%r8d,%esi
	vpxor	%ymm5,%ymm4,%ymm4
	rorxl	$13,%edx,%r14d
	rorxl	$2,%edx,%r13d
	leal	(%r10,%rcx,1),%r10d
	vpsrlq	$17,%ymm7,%ymm7
	andl	%esi,%r15d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	80-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r8d,%r15d
	vpaddd	%ymm4,%ymm1,%ymm1
	xorl	%r13d,%r14d
	leal	(%rcx,%r15,1),%ecx
	movl	%r11d,%r12d
	vpxor	%ymm7,%ymm6,%ymm6
	addl	40+128(%rsp),%ebx
	andl	%r10d,%r12d
	rorxl	$25,%r10d,%r13d
	vpsrlq	$2,%ymm7,%ymm7
	rorxl	$11,%r10d,%r15d
	leal	(%rcx,%r14,1),%ecx
	leal	(%rbx,%r12,1),%ebx
	vpxor	%ymm7,%ymm6,%ymm6
	andnl	%eax,%r10d,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%r10d,%r14d
	vpshufd	$132,%ymm6,%ymm6
	leal	(%rbx,%r12,1),%ebx
	xorl	%r14d,%r13d
	movl	%ecx,%r15d
	vpsrldq	$8,%ymm6,%ymm6
	rorxl	$22,%ecx,%r12d
	leal	(%rbx,%r13,1),%ebx
	xorl	%edx,%r15d
	vpaddd	%ymm6,%ymm1,%ymm1
	rorxl	$13,%ecx,%r14d
	rorxl	$2,%ecx,%r13d
	leal	(%r9,%rbx,1),%r9d
	vpshufd	$80,%ymm1,%ymm7
	andl	%r15d,%esi
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	96-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%edx,%esi
	vpsrld	$10,%ymm7,%ymm6
	xorl	%r13d,%r14d
	leal	(%rbx,%rsi,1),%ebx
	movl	%r10d,%r12d
	vpsrlq	$17,%ymm7,%ymm7
	addl	44+128(%rsp),%eax
	andl	%r9d,%r12d
	rorxl	$25,%r9d,%r13d
	vpxor	%ymm7,%ymm6,%ymm6
	rorxl	$11,%r9d,%esi
	leal	(%rbx,%r14,1),%ebx
	leal	(%rax,%r12,1),%eax
	vpsrlq	$2,%ymm7,%ymm7
	andnl	%r11d,%r9d,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%r9d,%r14d
	vpxor	%ymm7,%ymm6,%ymm6
	leal	(%rax,%r12,1),%eax
	xorl	%r14d,%r13d
	movl	%ebx,%esi
	vpshufd	$232,%ymm6,%ymm6
	rorxl	$22,%ebx,%r12d
	leal	(%rax,%r13,1),%eax
	xorl	%ecx,%esi
	vpslldq	$8,%ymm6,%ymm6
	rorxl	$13,%ebx,%r14d
	rorxl	$2,%ebx,%r13d
	leal	(%r8,%rax,1),%r8d
	vpaddd	%ymm6,%ymm1,%ymm1
	andl	%esi,%r15d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	112-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%ecx,%r15d
	vpaddd	32(%rbp),%ymm1,%ymm6
	xorl	%r13d,%r14d
	leal	(%rax,%r15,1),%eax
	movl	%r9d,%r12d
	vmovdqa	%ymm6,32(%rsp)
	leaq	-64(%rsp),%rsp
	vpalignr	$4,%ymm2,%ymm3,%ymm4
	addl	0+128(%rsp),%r11d
	andl	%r8d,%r12d
	rorxl	$25,%r8d,%r13d
	vpalignr	$4,%ymm0,%ymm1,%ymm7
	rorxl	$11,%r8d,%r15d
	leal	(%rax,%r14,1),%eax
	leal	(%r11,%r12,1),%r11d
	vpsrld	$7,%ymm4,%ymm6
	andnl	%r10d,%r8d,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%r8d,%r14d
	vpaddd	%ymm7,%ymm2,%ymm2
	leal	(%r11,%r12,1),%r11d
	xorl	%r14d,%r13d
	movl	%eax,%r15d
	vpsrld	$3,%ymm4,%ymm7
	rorxl	$22,%eax,%r12d
	leal	(%r11,%r13,1),%r11d
	xorl	%ebx,%r15d
	vpslld	$14,%ymm4,%ymm5
	rorxl	$13,%eax,%r14d
	rorxl	$2,%eax,%r13d
	leal	(%rdx,%r11,1),%edx
	vpxor	%ymm6,%ymm7,%ymm4
	andl	%r15d,%esi
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	128-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%ebx,%esi
	vpshufd	$250,%ymm1,%ymm7
	xorl	%r13d,%r14d
	leal	(%r11,%rsi,1),%r11d
	movl	%r8d,%r12d
	vpsrld	$11,%ymm6,%ymm6
	addl	4+128(%rsp),%r10d
	andl	%edx,%r12d
	rorxl	$25,%edx,%r13d
	vpxor	%ymm5,%ymm4,%ymm4
	rorxl	$11,%edx,%esi
	leal	(%r11,%r14,1),%r11d
	leal	(%r10,%r12,1),%r10d
	vpslld	$11,%ymm5,%ymm5
	andnl	%r9d,%edx,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%edx,%r14d
	vpxor	%ymm6,%ymm4,%ymm4
	leal	(%r10,%r12,1),%r10d
	xorl	%r14d,%r13d
	movl	%r11d,%esi
	vpsrld	$10,%ymm7,%ymm6
	rorxl	$22,%r11d,%r12d
	leal	(%r10,%r13,1),%r10d
	xorl	%eax,%esi
	vpxor	%ymm5,%ymm4,%ymm4
	rorxl	$13,%r11d,%r14d
	rorxl	$2,%r11d,%r13d
	leal	(%rcx,%r10,1),%ecx
	vpsrlq	$17,%ymm7,%ymm7
	andl	%esi,%r15d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	144-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%eax,%r15d
	vpaddd	%ymm4,%ymm2,%ymm2
	xorl	%r13d,%r14d
	leal	(%r10,%r15,1),%r10d
	movl	%edx,%r12d
	vpxor	%ymm7,%ymm6,%ymm6
	addl	8+128(%rsp),%r9d
	andl	%ecx,%r12d
	rorxl	$25,%ecx,%r13d
	vpsrlq	$2,%ymm7,%ymm7
	rorxl	$11,%ecx,%r15d
	leal	(%r10,%r14,1),%r10d
	leal	(%r9,%r12,1),%r9d
	vpxor	%ymm7,%ymm6,%ymm6
	andnl	%r8d,%ecx,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%ecx,%r14d
	vpshufd	$132,%ymm6,%ymm6
	leal	(%r9,%r12,1),%r9d
	xorl	%r14d,%r13d
	movl	%r10d,%r15d
	vpsrldq	$8,%ymm6,%ymm6
	rorxl	$22,%r10d,%r12d
	leal	(%r9,%r13,1),%r9d
	xorl	%r11d,%r15d
	vpaddd	%ymm6,%ymm2,%ymm2
	rorxl	$13,%r10d,%r14d
	rorxl	$2,%r10d,%r13d
	leal	(%rbx,%r9,1),%ebx
	vpshufd	$80,%ymm2,%ymm7
	andl	%r15d,%esi
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	160-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r11d,%esi
	vpsrld	$10,%ymm7,%ymm6
	xorl	%r13d,%r14d
	leal	(%r9,%rsi,1),%r9d
	movl	%ecx,%r12d
	vpsrlq	$17,%ymm7,%ymm7
	addl	12+128(%rsp),%r8d
	andl	%ebx,%r12d
	rorxl	$25,%ebx,%r13d
	vpxor	%ymm7,%ymm6,%ymm6
	rorxl	$11,%ebx,%esi
	leal	(%r9,%r14,1),%r9d
	leal	(%r8,%r12,1),%r8d
	vpsrlq	$2,%ymm7,%ymm7
	andnl	%edx,%ebx,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%ebx,%r14d
	vpxor	%ymm7,%ymm6,%ymm6
	leal	(%r8,%r12,1),%r8d
	xorl	%r14d,%r13d
	movl	%r9d,%esi
	vpshufd	$232,%ymm6,%ymm6
	rorxl	$22,%r9d,%r12d
	leal	(%r8,%r13,1),%r8d
	xorl	%r10d,%esi
	vpslldq	$8,%ymm6,%ymm6
	rorxl	$13,%r9d,%r14d
	rorxl	$2,%r9d,%r13d
	leal	(%rax,%r8,1),%eax
	vpaddd	%ymm6,%ymm2,%ymm2
	andl	%esi,%r15d
	vaesenclast	%xmm10,%xmm9,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	176-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r10d,%r15d
	vpaddd	64(%rbp),%ymm2,%ymm6
	xorl	%r13d,%r14d
	leal	(%r8,%r15,1),%r8d
	movl	%ebx,%r12d
	vmovdqa	%ymm6,0(%rsp)
	vpalignr	$4,%ymm3,%ymm0,%ymm4
	addl	32+128(%rsp),%edx
	andl	%eax,%r12d
	rorxl	$25,%eax,%r13d
	vpalignr	$4,%ymm1,%ymm2,%ymm7
	rorxl	$11,%eax,%r15d
	leal	(%r8,%r14,1),%r8d
	leal	(%rdx,%r12,1),%edx
	vpsrld	$7,%ymm4,%ymm6
	andnl	%ecx,%eax,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%eax,%r14d
	vpaddd	%ymm7,%ymm3,%ymm3
	leal	(%rdx,%r12,1),%edx
	xorl	%r14d,%r13d
	movl	%r8d,%r15d
	vpsrld	$3,%ymm4,%ymm7
	rorxl	$22,%r8d,%r12d
	leal	(%rdx,%r13,1),%edx
	xorl	%r9d,%r15d
	vpslld	$14,%ymm4,%ymm5
	rorxl	$13,%r8d,%r14d
	rorxl	$2,%r8d,%r13d
	leal	(%r11,%rdx,1),%r11d
	vpxor	%ymm6,%ymm7,%ymm4
	andl	%r15d,%esi
	vpand	%xmm12,%xmm11,%xmm8
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	192-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r9d,%esi
	vpshufd	$250,%ymm2,%ymm7
	xorl	%r13d,%r14d
	leal	(%rdx,%rsi,1),%edx
	movl	%eax,%r12d
	vpsrld	$11,%ymm6,%ymm6
	addl	36+128(%rsp),%ecx
	andl	%r11d,%r12d
	rorxl	$25,%r11d,%r13d
	vpxor	%ymm5,%ymm4,%ymm4
	rorxl	$11,%r11d,%esi
	leal	(%rdx,%r14,1),%edx
	leal	(%rcx,%r12,1),%ecx
	vpslld	$11,%ymm5,%ymm5
	andnl	%ebx,%r11d,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%r11d,%r14d
	vpxor	%ymm6,%ymm4,%ymm4
	leal	(%rcx,%r12,1),%ecx
	xorl	%r14d,%r13d
	movl	%edx,%esi
	vpsrld	$10,%ymm7,%ymm6
	rorxl	$22,%edx,%r12d
	leal	(%rcx,%r13,1),%ecx
	xorl	%r8d,%esi
	vpxor	%ymm5,%ymm4,%ymm4
	rorxl	$13,%edx,%r14d
	rorxl	$2,%edx,%r13d
	leal	(%r10,%rcx,1),%r10d
	vpsrlq	$17,%ymm7,%ymm7
	andl	%esi,%r15d
	vaesenclast	%xmm10,%xmm9,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	208-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r8d,%r15d
	vpaddd	%ymm4,%ymm3,%ymm3
	xorl	%r13d,%r14d
	leal	(%rcx,%r15,1),%ecx
	movl	%r11d,%r12d
	vpxor	%ymm7,%ymm6,%ymm6
	addl	40+128(%rsp),%ebx
	andl	%r10d,%r12d
	rorxl	$25,%r10d,%r13d
	vpsrlq	$2,%ymm7,%ymm7
	rorxl	$11,%r10d,%r15d
	leal	(%rcx,%r14,1),%ecx
	leal	(%rbx,%r12,1),%ebx
	vpxor	%ymm7,%ymm6,%ymm6
	andnl	%eax,%r10d,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%r10d,%r14d
	vpshufd	$132,%ymm6,%ymm6
	leal	(%rbx,%r12,1),%ebx
	xorl	%r14d,%r13d
	movl	%ecx,%r15d
	vpsrldq	$8,%ymm6,%ymm6
	rorxl	$22,%ecx,%r12d
	leal	(%rbx,%r13,1),%ebx
	xorl	%edx,%r15d
	vpaddd	%ymm6,%ymm3,%ymm3
	rorxl	$13,%ecx,%r14d
	rorxl	$2,%ecx,%r13d
	leal	(%r9,%rbx,1),%r9d
	vpshufd	$80,%ymm3,%ymm7
	andl	%r15d,%esi
	vpand	%xmm13,%xmm11,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	224-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%edx,%esi
	vpsrld	$10,%ymm7,%ymm6
	xorl	%r13d,%r14d
	leal	(%rbx,%rsi,1),%ebx
	movl	%r10d,%r12d
	vpsrlq	$17,%ymm7,%ymm7
	addl	44+128(%rsp),%eax
	andl	%r9d,%r12d
	rorxl	$25,%r9d,%r13d
	vpxor	%ymm7,%ymm6,%ymm6
	rorxl	$11,%r9d,%esi
	leal	(%rbx,%r14,1),%ebx
	leal	(%rax,%r12,1),%eax
	vpsrlq	$2,%ymm7,%ymm7
	andnl	%r11d,%r9d,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%r9d,%r14d
	vpxor	%ymm7,%ymm6,%ymm6
	leal	(%rax,%r12,1),%eax
	xorl	%r14d,%r13d
	movl	%ebx,%esi
	vpshufd	$232,%ymm6,%ymm6
	rorxl	$22,%ebx,%r12d
	leal	(%rax,%r13,1),%eax
	xorl	%ecx,%esi
	vpslldq	$8,%ymm6,%ymm6
	rorxl	$13,%ebx,%r14d
	rorxl	$2,%ebx,%r13d
	leal	(%r8,%rax,1),%r8d
	vpaddd	%ymm6,%ymm3,%ymm3
	andl	%esi,%r15d
	vpor	%xmm11,%xmm8,%xmm8
	vaesenclast	%xmm10,%xmm9,%xmm11
	vmovdqu	0-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%ecx,%r15d
	vpaddd	96(%rbp),%ymm3,%ymm6
	xorl	%r13d,%r14d
	leal	(%rax,%r15,1),%eax
	movl	%r9d,%r12d
	vmovdqa	%ymm6,32(%rsp)
	vmovq	%xmm15,%r13
	vpextrq	$1,%xmm15,%r15
	vpand	%xmm14,%xmm11,%xmm11
	vpor	%xmm11,%xmm8,%xmm8
	vmovdqu	%xmm8,(%r15,%r13,1)
	leaq	16(%r13),%r13
	leaq	128(%rbp),%rbp
	cmpb	$0,3(%rbp)
	jne	L$avx2_00_47
	vmovdqu	(%r13),%xmm9
	vpinsrq	$0,%r13,%xmm15,%xmm15
	addl	0+64(%rsp),%r11d
	andl	%r8d,%r12d
	rorxl	$25,%r8d,%r13d
	rorxl	$11,%r8d,%r15d
	leal	(%rax,%r14,1),%eax
	leal	(%r11,%r12,1),%r11d
	andnl	%r10d,%r8d,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%r8d,%r14d
	leal	(%r11,%r12,1),%r11d
	xorl	%r14d,%r13d
	movl	%eax,%r15d
	rorxl	$22,%eax,%r12d
	leal	(%r11,%r13,1),%r11d
	xorl	%ebx,%r15d
	rorxl	$13,%eax,%r14d
	rorxl	$2,%eax,%r13d
	leal	(%rdx,%r11,1),%edx
	andl	%r15d,%esi
	vpxor	%xmm10,%xmm9,%xmm9
	vmovdqu	16-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%ebx,%esi
	xorl	%r13d,%r14d
	leal	(%r11,%rsi,1),%r11d
	movl	%r8d,%r12d
	addl	4+64(%rsp),%r10d
	andl	%edx,%r12d
	rorxl	$25,%edx,%r13d
	rorxl	$11,%edx,%esi
	leal	(%r11,%r14,1),%r11d
	leal	(%r10,%r12,1),%r10d
	andnl	%r9d,%edx,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%edx,%r14d
	leal	(%r10,%r12,1),%r10d
	xorl	%r14d,%r13d
	movl	%r11d,%esi
	rorxl	$22,%r11d,%r12d
	leal	(%r10,%r13,1),%r10d
	xorl	%eax,%esi
	rorxl	$13,%r11d,%r14d
	rorxl	$2,%r11d,%r13d
	leal	(%rcx,%r10,1),%ecx
	andl	%esi,%r15d
	vpxor	%xmm8,%xmm9,%xmm9
	xorl	%r12d,%r14d
	xorl	%eax,%r15d
	xorl	%r13d,%r14d
	leal	(%r10,%r15,1),%r10d
	movl	%edx,%r12d
	addl	8+64(%rsp),%r9d
	andl	%ecx,%r12d
	rorxl	$25,%ecx,%r13d
	rorxl	$11,%ecx,%r15d
	leal	(%r10,%r14,1),%r10d
	leal	(%r9,%r12,1),%r9d
	andnl	%r8d,%ecx,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%ecx,%r14d
	leal	(%r9,%r12,1),%r9d
	xorl	%r14d,%r13d
	movl	%r10d,%r15d
	rorxl	$22,%r10d,%r12d
	leal	(%r9,%r13,1),%r9d
	xorl	%r11d,%r15d
	rorxl	$13,%r10d,%r14d
	rorxl	$2,%r10d,%r13d
	leal	(%rbx,%r9,1),%ebx
	andl	%r15d,%esi
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	32-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r11d,%esi
	xorl	%r13d,%r14d
	leal	(%r9,%rsi,1),%r9d
	movl	%ecx,%r12d
	addl	12+64(%rsp),%r8d
	andl	%ebx,%r12d
	rorxl	$25,%ebx,%r13d
	rorxl	$11,%ebx,%esi
	leal	(%r9,%r14,1),%r9d
	leal	(%r8,%r12,1),%r8d
	andnl	%edx,%ebx,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%ebx,%r14d
	leal	(%r8,%r12,1),%r8d
	xorl	%r14d,%r13d
	movl	%r9d,%esi
	rorxl	$22,%r9d,%r12d
	leal	(%r8,%r13,1),%r8d
	xorl	%r10d,%esi
	rorxl	$13,%r9d,%r14d
	rorxl	$2,%r9d,%r13d
	leal	(%rax,%r8,1),%eax
	andl	%esi,%r15d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	48-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r10d,%r15d
	xorl	%r13d,%r14d
	leal	(%r8,%r15,1),%r8d
	movl	%ebx,%r12d
	addl	32+64(%rsp),%edx
	andl	%eax,%r12d
	rorxl	$25,%eax,%r13d
	rorxl	$11,%eax,%r15d
	leal	(%r8,%r14,1),%r8d
	leal	(%rdx,%r12,1),%edx
	andnl	%ecx,%eax,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%eax,%r14d
	leal	(%rdx,%r12,1),%edx
	xorl	%r14d,%r13d
	movl	%r8d,%r15d
	rorxl	$22,%r8d,%r12d
	leal	(%rdx,%r13,1),%edx
	xorl	%r9d,%r15d
	rorxl	$13,%r8d,%r14d
	rorxl	$2,%r8d,%r13d
	leal	(%r11,%rdx,1),%r11d
	andl	%r15d,%esi
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	64-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r9d,%esi
	xorl	%r13d,%r14d
	leal	(%rdx,%rsi,1),%edx
	movl	%eax,%r12d
	addl	36+64(%rsp),%ecx
	andl	%r11d,%r12d
	rorxl	$25,%r11d,%r13d
	rorxl	$11,%r11d,%esi
	leal	(%rdx,%r14,1),%edx
	leal	(%rcx,%r12,1),%ecx
	andnl	%ebx,%r11d,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%r11d,%r14d
	leal	(%rcx,%r12,1),%ecx
	xorl	%r14d,%r13d
	movl	%edx,%esi
	rorxl	$22,%edx,%r12d
	leal	(%rcx,%r13,1),%ecx
	xorl	%r8d,%esi
	rorxl	$13,%edx,%r14d
	rorxl	$2,%edx,%r13d
	leal	(%r10,%rcx,1),%r10d
	andl	%esi,%r15d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	80-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r8d,%r15d
	xorl	%r13d,%r14d
	leal	(%rcx,%r15,1),%ecx
	movl	%r11d,%r12d
	addl	40+64(%rsp),%ebx
	andl	%r10d,%r12d
	rorxl	$25,%r10d,%r13d
	rorxl	$11,%r10d,%r15d
	leal	(%rcx,%r14,1),%ecx
	leal	(%rbx,%r12,1),%ebx
	andnl	%eax,%r10d,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%r10d,%r14d
	leal	(%rbx,%r12,1),%ebx
	xorl	%r14d,%r13d
	movl	%ecx,%r15d
	rorxl	$22,%ecx,%r12d
	leal	(%rbx,%r13,1),%ebx
	xorl	%edx,%r15d
	rorxl	$13,%ecx,%r14d
	rorxl	$2,%ecx,%r13d
	leal	(%r9,%rbx,1),%r9d
	andl	%r15d,%esi
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	96-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%edx,%esi
	xorl	%r13d,%r14d
	leal	(%rbx,%rsi,1),%ebx
	movl	%r10d,%r12d
	addl	44+64(%rsp),%eax
	andl	%r9d,%r12d
	rorxl	$25,%r9d,%r13d
	rorxl	$11,%r9d,%esi
	leal	(%rbx,%r14,1),%ebx
	leal	(%rax,%r12,1),%eax
	andnl	%r11d,%r9d,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%r9d,%r14d
	leal	(%rax,%r12,1),%eax
	xorl	%r14d,%r13d
	movl	%ebx,%esi
	rorxl	$22,%ebx,%r12d
	leal	(%rax,%r13,1),%eax
	xorl	%ecx,%esi
	rorxl	$13,%ebx,%r14d
	rorxl	$2,%ebx,%r13d
	leal	(%r8,%rax,1),%r8d
	andl	%esi,%r15d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	112-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%ecx,%r15d
	xorl	%r13d,%r14d
	leal	(%rax,%r15,1),%eax
	movl	%r9d,%r12d
	addl	0(%rsp),%r11d
	andl	%r8d,%r12d
	rorxl	$25,%r8d,%r13d
	rorxl	$11,%r8d,%r15d
	leal	(%rax,%r14,1),%eax
	leal	(%r11,%r12,1),%r11d
	andnl	%r10d,%r8d,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%r8d,%r14d
	leal	(%r11,%r12,1),%r11d
	xorl	%r14d,%r13d
	movl	%eax,%r15d
	rorxl	$22,%eax,%r12d
	leal	(%r11,%r13,1),%r11d
	xorl	%ebx,%r15d
	rorxl	$13,%eax,%r14d
	rorxl	$2,%eax,%r13d
	leal	(%rdx,%r11,1),%edx
	andl	%r15d,%esi
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	128-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%ebx,%esi
	xorl	%r13d,%r14d
	leal	(%r11,%rsi,1),%r11d
	movl	%r8d,%r12d
	addl	4(%rsp),%r10d
	andl	%edx,%r12d
	rorxl	$25,%edx,%r13d
	rorxl	$11,%edx,%esi
	leal	(%r11,%r14,1),%r11d
	leal	(%r10,%r12,1),%r10d
	andnl	%r9d,%edx,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%edx,%r14d
	leal	(%r10,%r12,1),%r10d
	xorl	%r14d,%r13d
	movl	%r11d,%esi
	rorxl	$22,%r11d,%r12d
	leal	(%r10,%r13,1),%r10d
	xorl	%eax,%esi
	rorxl	$13,%r11d,%r14d
	rorxl	$2,%r11d,%r13d
	leal	(%rcx,%r10,1),%ecx
	andl	%esi,%r15d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	144-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%eax,%r15d
	xorl	%r13d,%r14d
	leal	(%r10,%r15,1),%r10d
	movl	%edx,%r12d
	addl	8(%rsp),%r9d
	andl	%ecx,%r12d
	rorxl	$25,%ecx,%r13d
	rorxl	$11,%ecx,%r15d
	leal	(%r10,%r14,1),%r10d
	leal	(%r9,%r12,1),%r9d
	andnl	%r8d,%ecx,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%ecx,%r14d
	leal	(%r9,%r12,1),%r9d
	xorl	%r14d,%r13d
	movl	%r10d,%r15d
	rorxl	$22,%r10d,%r12d
	leal	(%r9,%r13,1),%r9d
	xorl	%r11d,%r15d
	rorxl	$13,%r10d,%r14d
	rorxl	$2,%r10d,%r13d
	leal	(%rbx,%r9,1),%ebx
	andl	%r15d,%esi
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	160-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r11d,%esi
	xorl	%r13d,%r14d
	leal	(%r9,%rsi,1),%r9d
	movl	%ecx,%r12d
	addl	12(%rsp),%r8d
	andl	%ebx,%r12d
	rorxl	$25,%ebx,%r13d
	rorxl	$11,%ebx,%esi
	leal	(%r9,%r14,1),%r9d
	leal	(%r8,%r12,1),%r8d
	andnl	%edx,%ebx,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%ebx,%r14d
	leal	(%r8,%r12,1),%r8d
	xorl	%r14d,%r13d
	movl	%r9d,%esi
	rorxl	$22,%r9d,%r12d
	leal	(%r8,%r13,1),%r8d
	xorl	%r10d,%esi
	rorxl	$13,%r9d,%r14d
	rorxl	$2,%r9d,%r13d
	leal	(%rax,%r8,1),%eax
	andl	%esi,%r15d
	vaesenclast	%xmm10,%xmm9,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	176-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r10d,%r15d
	xorl	%r13d,%r14d
	leal	(%r8,%r15,1),%r8d
	movl	%ebx,%r12d
	addl	32(%rsp),%edx
	andl	%eax,%r12d
	rorxl	$25,%eax,%r13d
	rorxl	$11,%eax,%r15d
	leal	(%r8,%r14,1),%r8d
	leal	(%rdx,%r12,1),%edx
	andnl	%ecx,%eax,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%eax,%r14d
	leal	(%rdx,%r12,1),%edx
	xorl	%r14d,%r13d
	movl	%r8d,%r15d
	rorxl	$22,%r8d,%r12d
	leal	(%rdx,%r13,1),%edx
	xorl	%r9d,%r15d
	rorxl	$13,%r8d,%r14d
	rorxl	$2,%r8d,%r13d
	leal	(%r11,%rdx,1),%r11d
	andl	%r15d,%esi
	vpand	%xmm12,%xmm11,%xmm8
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	192-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r9d,%esi
	xorl	%r13d,%r14d
	leal	(%rdx,%rsi,1),%edx
	movl	%eax,%r12d
	addl	36(%rsp),%ecx
	andl	%r11d,%r12d
	rorxl	$25,%r11d,%r13d
	rorxl	$11,%r11d,%esi
	leal	(%rdx,%r14,1),%edx
	leal	(%rcx,%r12,1),%ecx
	andnl	%ebx,%r11d,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%r11d,%r14d
	leal	(%rcx,%r12,1),%ecx
	xorl	%r14d,%r13d
	movl	%edx,%esi
	rorxl	$22,%edx,%r12d
	leal	(%rcx,%r13,1),%ecx
	xorl	%r8d,%esi
	rorxl	$13,%edx,%r14d
	rorxl	$2,%edx,%r13d
	leal	(%r10,%rcx,1),%r10d
	andl	%esi,%r15d
	vaesenclast	%xmm10,%xmm9,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	208-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r8d,%r15d
	xorl	%r13d,%r14d
	leal	(%rcx,%r15,1),%ecx
	movl	%r11d,%r12d
	addl	40(%rsp),%ebx
	andl	%r10d,%r12d
	rorxl	$25,%r10d,%r13d
	rorxl	$11,%r10d,%r15d
	leal	(%rcx,%r14,1),%ecx
	leal	(%rbx,%r12,1),%ebx
	andnl	%eax,%r10d,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%r10d,%r14d
	leal	(%rbx,%r12,1),%ebx
	xorl	%r14d,%r13d
	movl	%ecx,%r15d
	rorxl	$22,%ecx,%r12d
	leal	(%rbx,%r13,1),%ebx
	xorl	%edx,%r15d
	rorxl	$13,%ecx,%r14d
	rorxl	$2,%ecx,%r13d
	leal	(%r9,%rbx,1),%r9d
	andl	%r15d,%esi
	vpand	%xmm13,%xmm11,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	224-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%edx,%esi
	xorl	%r13d,%r14d
	leal	(%rbx,%rsi,1),%ebx
	movl	%r10d,%r12d
	addl	44(%rsp),%eax
	andl	%r9d,%r12d
	rorxl	$25,%r9d,%r13d
	rorxl	$11,%r9d,%esi
	leal	(%rbx,%r14,1),%ebx
	leal	(%rax,%r12,1),%eax
	andnl	%r11d,%r9d,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%r9d,%r14d
	leal	(%rax,%r12,1),%eax
	xorl	%r14d,%r13d
	movl	%ebx,%esi
	rorxl	$22,%ebx,%r12d
	leal	(%rax,%r13,1),%eax
	xorl	%ecx,%esi
	rorxl	$13,%ebx,%r14d
	rorxl	$2,%ebx,%r13d
	leal	(%r8,%rax,1),%r8d
	andl	%esi,%r15d
	vpor	%xmm11,%xmm8,%xmm8
	vaesenclast	%xmm10,%xmm9,%xmm11
	vmovdqu	0-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%ecx,%r15d
	xorl	%r13d,%r14d
	leal	(%rax,%r15,1),%eax
	movl	%r9d,%r12d
	vpextrq	$1,%xmm15,%r12
	vmovq	%xmm15,%r13
	movq	552(%rsp),%r15
	addl	%r14d,%eax
	leaq	448(%rsp),%rbp

	vpand	%xmm14,%xmm11,%xmm11
	vpor	%xmm11,%xmm8,%xmm8
	vmovdqu	%xmm8,(%r12,%r13,1)
	leaq	16(%r13),%r13

	addl	0(%r15),%eax
	addl	4(%r15),%ebx
	addl	8(%r15),%ecx
	addl	12(%r15),%edx
	addl	16(%r15),%r8d
	addl	20(%r15),%r9d
	addl	24(%r15),%r10d
	addl	28(%r15),%r11d

	movl	%eax,0(%r15)
	movl	%ebx,4(%r15)
	movl	%ecx,8(%r15)
	movl	%edx,12(%r15)
	movl	%r8d,16(%r15)
	movl	%r9d,20(%r15)
	movl	%r10d,24(%r15)
	movl	%r11d,28(%r15)

	cmpq	80(%rbp),%r13
	je	L$done_avx2

	xorl	%r14d,%r14d
	movl	%ebx,%esi
	movl	%r9d,%r12d
	xorl	%ecx,%esi
	jmp	L$ower_avx2
.p2align	4
L$ower_avx2:
	vmovdqu	(%r13),%xmm9
	vpinsrq	$0,%r13,%xmm15,%xmm15
	addl	0+16(%rbp),%r11d
	andl	%r8d,%r12d
	rorxl	$25,%r8d,%r13d
	rorxl	$11,%r8d,%r15d
	leal	(%rax,%r14,1),%eax
	leal	(%r11,%r12,1),%r11d
	andnl	%r10d,%r8d,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%r8d,%r14d
	leal	(%r11,%r12,1),%r11d
	xorl	%r14d,%r13d
	movl	%eax,%r15d
	rorxl	$22,%eax,%r12d
	leal	(%r11,%r13,1),%r11d
	xorl	%ebx,%r15d
	rorxl	$13,%eax,%r14d
	rorxl	$2,%eax,%r13d
	leal	(%rdx,%r11,1),%edx
	andl	%r15d,%esi
	vpxor	%xmm10,%xmm9,%xmm9
	vmovdqu	16-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%ebx,%esi
	xorl	%r13d,%r14d
	leal	(%r11,%rsi,1),%r11d
	movl	%r8d,%r12d
	addl	4+16(%rbp),%r10d
	andl	%edx,%r12d
	rorxl	$25,%edx,%r13d
	rorxl	$11,%edx,%esi
	leal	(%r11,%r14,1),%r11d
	leal	(%r10,%r12,1),%r10d
	andnl	%r9d,%edx,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%edx,%r14d
	leal	(%r10,%r12,1),%r10d
	xorl	%r14d,%r13d
	movl	%r11d,%esi
	rorxl	$22,%r11d,%r12d
	leal	(%r10,%r13,1),%r10d
	xorl	%eax,%esi
	rorxl	$13,%r11d,%r14d
	rorxl	$2,%r11d,%r13d
	leal	(%rcx,%r10,1),%ecx
	andl	%esi,%r15d
	vpxor	%xmm8,%xmm9,%xmm9
	xorl	%r12d,%r14d
	xorl	%eax,%r15d
	xorl	%r13d,%r14d
	leal	(%r10,%r15,1),%r10d
	movl	%edx,%r12d
	addl	8+16(%rbp),%r9d
	andl	%ecx,%r12d
	rorxl	$25,%ecx,%r13d
	rorxl	$11,%ecx,%r15d
	leal	(%r10,%r14,1),%r10d
	leal	(%r9,%r12,1),%r9d
	andnl	%r8d,%ecx,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%ecx,%r14d
	leal	(%r9,%r12,1),%r9d
	xorl	%r14d,%r13d
	movl	%r10d,%r15d
	rorxl	$22,%r10d,%r12d
	leal	(%r9,%r13,1),%r9d
	xorl	%r11d,%r15d
	rorxl	$13,%r10d,%r14d
	rorxl	$2,%r10d,%r13d
	leal	(%rbx,%r9,1),%ebx
	andl	%r15d,%esi
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	32-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r11d,%esi
	xorl	%r13d,%r14d
	leal	(%r9,%rsi,1),%r9d
	movl	%ecx,%r12d
	addl	12+16(%rbp),%r8d
	andl	%ebx,%r12d
	rorxl	$25,%ebx,%r13d
	rorxl	$11,%ebx,%esi
	leal	(%r9,%r14,1),%r9d
	leal	(%r8,%r12,1),%r8d
	andnl	%edx,%ebx,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%ebx,%r14d
	leal	(%r8,%r12,1),%r8d
	xorl	%r14d,%r13d
	movl	%r9d,%esi
	rorxl	$22,%r9d,%r12d
	leal	(%r8,%r13,1),%r8d
	xorl	%r10d,%esi
	rorxl	$13,%r9d,%r14d
	rorxl	$2,%r9d,%r13d
	leal	(%rax,%r8,1),%eax
	andl	%esi,%r15d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	48-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r10d,%r15d
	xorl	%r13d,%r14d
	leal	(%r8,%r15,1),%r8d
	movl	%ebx,%r12d
	addl	32+16(%rbp),%edx
	andl	%eax,%r12d
	rorxl	$25,%eax,%r13d
	rorxl	$11,%eax,%r15d
	leal	(%r8,%r14,1),%r8d
	leal	(%rdx,%r12,1),%edx
	andnl	%ecx,%eax,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%eax,%r14d
	leal	(%rdx,%r12,1),%edx
	xorl	%r14d,%r13d
	movl	%r8d,%r15d
	rorxl	$22,%r8d,%r12d
	leal	(%rdx,%r13,1),%edx
	xorl	%r9d,%r15d
	rorxl	$13,%r8d,%r14d
	rorxl	$2,%r8d,%r13d
	leal	(%r11,%rdx,1),%r11d
	andl	%r15d,%esi
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	64-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r9d,%esi
	xorl	%r13d,%r14d
	leal	(%rdx,%rsi,1),%edx
	movl	%eax,%r12d
	addl	36+16(%rbp),%ecx
	andl	%r11d,%r12d
	rorxl	$25,%r11d,%r13d
	rorxl	$11,%r11d,%esi
	leal	(%rdx,%r14,1),%edx
	leal	(%rcx,%r12,1),%ecx
	andnl	%ebx,%r11d,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%r11d,%r14d
	leal	(%rcx,%r12,1),%ecx
	xorl	%r14d,%r13d
	movl	%edx,%esi
	rorxl	$22,%edx,%r12d
	leal	(%rcx,%r13,1),%ecx
	xorl	%r8d,%esi
	rorxl	$13,%edx,%r14d
	rorxl	$2,%edx,%r13d
	leal	(%r10,%rcx,1),%r10d
	andl	%esi,%r15d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	80-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r8d,%r15d
	xorl	%r13d,%r14d
	leal	(%rcx,%r15,1),%ecx
	movl	%r11d,%r12d
	addl	40+16(%rbp),%ebx
	andl	%r10d,%r12d
	rorxl	$25,%r10d,%r13d
	rorxl	$11,%r10d,%r15d
	leal	(%rcx,%r14,1),%ecx
	leal	(%rbx,%r12,1),%ebx
	andnl	%eax,%r10d,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%r10d,%r14d
	leal	(%rbx,%r12,1),%ebx
	xorl	%r14d,%r13d
	movl	%ecx,%r15d
	rorxl	$22,%ecx,%r12d
	leal	(%rbx,%r13,1),%ebx
	xorl	%edx,%r15d
	rorxl	$13,%ecx,%r14d
	rorxl	$2,%ecx,%r13d
	leal	(%r9,%rbx,1),%r9d
	andl	%r15d,%esi
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	96-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%edx,%esi
	xorl	%r13d,%r14d
	leal	(%rbx,%rsi,1),%ebx
	movl	%r10d,%r12d
	addl	44+16(%rbp),%eax
	andl	%r9d,%r12d
	rorxl	$25,%r9d,%r13d
	rorxl	$11,%r9d,%esi
	leal	(%rbx,%r14,1),%ebx
	leal	(%rax,%r12,1),%eax
	andnl	%r11d,%r9d,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%r9d,%r14d
	leal	(%rax,%r12,1),%eax
	xorl	%r14d,%r13d
	movl	%ebx,%esi
	rorxl	$22,%ebx,%r12d
	leal	(%rax,%r13,1),%eax
	xorl	%ecx,%esi
	rorxl	$13,%ebx,%r14d
	rorxl	$2,%ebx,%r13d
	leal	(%r8,%rax,1),%r8d
	andl	%esi,%r15d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	112-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%ecx,%r15d
	xorl	%r13d,%r14d
	leal	(%rax,%r15,1),%eax
	movl	%r9d,%r12d
	leaq	-64(%rbp),%rbp
	addl	0+16(%rbp),%r11d
	andl	%r8d,%r12d
	rorxl	$25,%r8d,%r13d
	rorxl	$11,%r8d,%r15d
	leal	(%rax,%r14,1),%eax
	leal	(%r11,%r12,1),%r11d
	andnl	%r10d,%r8d,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%r8d,%r14d
	leal	(%r11,%r12,1),%r11d
	xorl	%r14d,%r13d
	movl	%eax,%r15d
	rorxl	$22,%eax,%r12d
	leal	(%r11,%r13,1),%r11d
	xorl	%ebx,%r15d
	rorxl	$13,%eax,%r14d
	rorxl	$2,%eax,%r13d
	leal	(%rdx,%r11,1),%edx
	andl	%r15d,%esi
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	128-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%ebx,%esi
	xorl	%r13d,%r14d
	leal	(%r11,%rsi,1),%r11d
	movl	%r8d,%r12d
	addl	4+16(%rbp),%r10d
	andl	%edx,%r12d
	rorxl	$25,%edx,%r13d
	rorxl	$11,%edx,%esi
	leal	(%r11,%r14,1),%r11d
	leal	(%r10,%r12,1),%r10d
	andnl	%r9d,%edx,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%edx,%r14d
	leal	(%r10,%r12,1),%r10d
	xorl	%r14d,%r13d
	movl	%r11d,%esi
	rorxl	$22,%r11d,%r12d
	leal	(%r10,%r13,1),%r10d
	xorl	%eax,%esi
	rorxl	$13,%r11d,%r14d
	rorxl	$2,%r11d,%r13d
	leal	(%rcx,%r10,1),%ecx
	andl	%esi,%r15d
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	144-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%eax,%r15d
	xorl	%r13d,%r14d
	leal	(%r10,%r15,1),%r10d
	movl	%edx,%r12d
	addl	8+16(%rbp),%r9d
	andl	%ecx,%r12d
	rorxl	$25,%ecx,%r13d
	rorxl	$11,%ecx,%r15d
	leal	(%r10,%r14,1),%r10d
	leal	(%r9,%r12,1),%r9d
	andnl	%r8d,%ecx,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%ecx,%r14d
	leal	(%r9,%r12,1),%r9d
	xorl	%r14d,%r13d
	movl	%r10d,%r15d
	rorxl	$22,%r10d,%r12d
	leal	(%r9,%r13,1),%r9d
	xorl	%r11d,%r15d
	rorxl	$13,%r10d,%r14d
	rorxl	$2,%r10d,%r13d
	leal	(%rbx,%r9,1),%ebx
	andl	%r15d,%esi
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	160-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r11d,%esi
	xorl	%r13d,%r14d
	leal	(%r9,%rsi,1),%r9d
	movl	%ecx,%r12d
	addl	12+16(%rbp),%r8d
	andl	%ebx,%r12d
	rorxl	$25,%ebx,%r13d
	rorxl	$11,%ebx,%esi
	leal	(%r9,%r14,1),%r9d
	leal	(%r8,%r12,1),%r8d
	andnl	%edx,%ebx,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%ebx,%r14d
	leal	(%r8,%r12,1),%r8d
	xorl	%r14d,%r13d
	movl	%r9d,%esi
	rorxl	$22,%r9d,%r12d
	leal	(%r8,%r13,1),%r8d
	xorl	%r10d,%esi
	rorxl	$13,%r9d,%r14d
	rorxl	$2,%r9d,%r13d
	leal	(%rax,%r8,1),%eax
	andl	%esi,%r15d
	vaesenclast	%xmm10,%xmm9,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	176-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r10d,%r15d
	xorl	%r13d,%r14d
	leal	(%r8,%r15,1),%r8d
	movl	%ebx,%r12d
	addl	32+16(%rbp),%edx
	andl	%eax,%r12d
	rorxl	$25,%eax,%r13d
	rorxl	$11,%eax,%r15d
	leal	(%r8,%r14,1),%r8d
	leal	(%rdx,%r12,1),%edx
	andnl	%ecx,%eax,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%eax,%r14d
	leal	(%rdx,%r12,1),%edx
	xorl	%r14d,%r13d
	movl	%r8d,%r15d
	rorxl	$22,%r8d,%r12d
	leal	(%rdx,%r13,1),%edx
	xorl	%r9d,%r15d
	rorxl	$13,%r8d,%r14d
	rorxl	$2,%r8d,%r13d
	leal	(%r11,%rdx,1),%r11d
	andl	%r15d,%esi
	vpand	%xmm12,%xmm11,%xmm8
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	192-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r9d,%esi
	xorl	%r13d,%r14d
	leal	(%rdx,%rsi,1),%edx
	movl	%eax,%r12d
	addl	36+16(%rbp),%ecx
	andl	%r11d,%r12d
	rorxl	$25,%r11d,%r13d
	rorxl	$11,%r11d,%esi
	leal	(%rdx,%r14,1),%edx
	leal	(%rcx,%r12,1),%ecx
	andnl	%ebx,%r11d,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%r11d,%r14d
	leal	(%rcx,%r12,1),%ecx
	xorl	%r14d,%r13d
	movl	%edx,%esi
	rorxl	$22,%edx,%r12d
	leal	(%rcx,%r13,1),%ecx
	xorl	%r8d,%esi
	rorxl	$13,%edx,%r14d
	rorxl	$2,%edx,%r13d
	leal	(%r10,%rcx,1),%r10d
	andl	%esi,%r15d
	vaesenclast	%xmm10,%xmm9,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	208-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%r8d,%r15d
	xorl	%r13d,%r14d
	leal	(%rcx,%r15,1),%ecx
	movl	%r11d,%r12d
	addl	40+16(%rbp),%ebx
	andl	%r10d,%r12d
	rorxl	$25,%r10d,%r13d
	rorxl	$11,%r10d,%r15d
	leal	(%rcx,%r14,1),%ecx
	leal	(%rbx,%r12,1),%ebx
	andnl	%eax,%r10d,%r12d
	xorl	%r15d,%r13d
	rorxl	$6,%r10d,%r14d
	leal	(%rbx,%r12,1),%ebx
	xorl	%r14d,%r13d
	movl	%ecx,%r15d
	rorxl	$22,%ecx,%r12d
	leal	(%rbx,%r13,1),%ebx
	xorl	%edx,%r15d
	rorxl	$13,%ecx,%r14d
	rorxl	$2,%ecx,%r13d
	leal	(%r9,%rbx,1),%r9d
	andl	%r15d,%esi
	vpand	%xmm13,%xmm11,%xmm11
	vaesenc	%xmm10,%xmm9,%xmm9
	vmovdqu	224-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%edx,%esi
	xorl	%r13d,%r14d
	leal	(%rbx,%rsi,1),%ebx
	movl	%r10d,%r12d
	addl	44+16(%rbp),%eax
	andl	%r9d,%r12d
	rorxl	$25,%r9d,%r13d
	rorxl	$11,%r9d,%esi
	leal	(%rbx,%r14,1),%ebx
	leal	(%rax,%r12,1),%eax
	andnl	%r11d,%r9d,%r12d
	xorl	%esi,%r13d
	rorxl	$6,%r9d,%r14d
	leal	(%rax,%r12,1),%eax
	xorl	%r14d,%r13d
	movl	%ebx,%esi
	rorxl	$22,%ebx,%r12d
	leal	(%rax,%r13,1),%eax
	xorl	%ecx,%esi
	rorxl	$13,%ebx,%r14d
	rorxl	$2,%ebx,%r13d
	leal	(%r8,%rax,1),%r8d
	andl	%esi,%r15d
	vpor	%xmm11,%xmm8,%xmm8
	vaesenclast	%xmm10,%xmm9,%xmm11
	vmovdqu	0-128(%rdi),%xmm10
	xorl	%r12d,%r14d
	xorl	%ecx,%r15d
	xorl	%r13d,%r14d
	leal	(%rax,%r15,1),%eax
	movl	%r9d,%r12d
	vmovq	%xmm15,%r13
	vpextrq	$1,%xmm15,%r15
	vpand	%xmm14,%xmm11,%xmm11
	vpor	%xmm11,%xmm8,%xmm8
	leaq	-64(%rbp),%rbp
	vmovdqu	%xmm8,(%r15,%r13,1)
	leaq	16(%r13),%r13
	cmpq	%rsp,%rbp
	jae	L$ower_avx2

	movq	552(%rsp),%r15
	leaq	64(%r13),%r13
	movq	560(%rsp),%rsi
	addl	%r14d,%eax
	leaq	448(%rsp),%rsp

	addl	0(%r15),%eax
	addl	4(%r15),%ebx
	addl	8(%r15),%ecx
	addl	12(%r15),%edx
	addl	16(%r15),%r8d
	addl	20(%r15),%r9d
	addl	24(%r15),%r10d
	leaq	(%rsi,%r13,1),%r12
	addl	28(%r15),%r11d

	cmpq	64+16(%rsp),%r13

	movl	%eax,0(%r15)
	cmoveq	%rsp,%r12
	movl	%ebx,4(%r15)
	movl	%ecx,8(%r15)
	movl	%edx,12(%r15)
	movl	%r8d,16(%r15)
	movl	%r9d,20(%r15)
	movl	%r10d,24(%r15)
	movl	%r11d,28(%r15)

	jbe	L$oop_avx2
	leaq	(%rsp),%rbp

L$done_avx2:
	leaq	(%rbp),%rsp
	movq	64+32(%rsp),%r8
	movq	64+56(%rsp),%rsi
	vmovdqu	%xmm8,(%r8)
	vzeroall
	movq	(%rsi),%r15
	movq	8(%rsi),%r14
	movq	16(%rsi),%r13
	movq	24(%rsi),%r12
	movq	32(%rsi),%rbp
	movq	40(%rsi),%rbx
	leaq	48(%rsi),%rsp
L$epilogue_avx2:
	.byte	0xf3,0xc3


.p2align	5
aesni_cbc_sha256_enc_shaext:
	movq	8(%rsp),%r10
	leaq	K256+128(%rip),%rax
	movdqu	(%r9),%xmm1
	movdqu	16(%r9),%xmm2
	movdqa	512-128(%rax),%xmm3

	movl	240(%rcx),%r11d
	subq	%rdi,%rsi
	movups	(%rcx),%xmm15
	movups	16(%rcx),%xmm4
	leaq	112(%rcx),%rcx

	pshufd	$0x1b,%xmm1,%xmm0
	pshufd	$0xb1,%xmm1,%xmm1
	pshufd	$0x1b,%xmm2,%xmm2
	movdqa	%xmm3,%xmm7
.byte	102,15,58,15,202,8
	punpcklqdq	%xmm0,%xmm2

	jmp	L$oop_shaext

.p2align	4
L$oop_shaext:
	movdqu	(%r10),%xmm10
	movdqu	16(%r10),%xmm11
	movdqu	32(%r10),%xmm12
.byte	102,68,15,56,0,211
	movdqu	48(%r10),%xmm13

	movdqa	0-128(%rax),%xmm0
	paddd	%xmm10,%xmm0
.byte	102,68,15,56,0,219
	movdqa	%xmm2,%xmm9
	movdqa	%xmm1,%xmm8
	movups	0(%rdi),%xmm14
	xorps	%xmm15,%xmm14
	xorps	%xmm14,%xmm6
	movups	-80(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
.byte	15,56,203,209
	pshufd	$0x0e,%xmm0,%xmm0
	movups	-64(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
.byte	15,56,203,202

	movdqa	32-128(%rax),%xmm0
	paddd	%xmm11,%xmm0
.byte	102,68,15,56,0,227
	leaq	64(%r10),%r10
	movups	-48(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
.byte	15,56,203,209
	pshufd	$0x0e,%xmm0,%xmm0
	movups	-32(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
.byte	15,56,203,202

	movdqa	64-128(%rax),%xmm0
	paddd	%xmm12,%xmm0
.byte	102,68,15,56,0,235
.byte	69,15,56,204,211
	movups	-16(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
.byte	15,56,203,209
	pshufd	$0x0e,%xmm0,%xmm0
	movdqa	%xmm13,%xmm3
.byte	102,65,15,58,15,220,4
	paddd	%xmm3,%xmm10
	movups	0(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
.byte	15,56,203,202

	movdqa	96-128(%rax),%xmm0
	paddd	%xmm13,%xmm0
.byte	69,15,56,205,213
.byte	69,15,56,204,220
	movups	16(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
.byte	15,56,203,209
	pshufd	$0x0e,%xmm0,%xmm0
	movups	32(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
	movdqa	%xmm10,%xmm3
.byte	102,65,15,58,15,221,4
	paddd	%xmm3,%xmm11
.byte	15,56,203,202
	movdqa	128-128(%rax),%xmm0
	paddd	%xmm10,%xmm0
.byte	69,15,56,205,218
.byte	69,15,56,204,229
	movups	48(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
.byte	15,56,203,209
	pshufd	$0x0e,%xmm0,%xmm0
	movdqa	%xmm11,%xmm3
.byte	102,65,15,58,15,218,4
	paddd	%xmm3,%xmm12
	cmpl	$11,%r11d
	jb	L$aesenclast1
	movups	64(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
	movups	80(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
	je	L$aesenclast1
	movups	96(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
	movups	112(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
L$aesenclast1:
	aesenclast	%xmm5,%xmm6
	movups	16-112(%rcx),%xmm4
	nop
.byte	15,56,203,202
	movups	16(%rdi),%xmm14
	xorps	%xmm15,%xmm14
	movups	%xmm6,0(%rsi,%rdi,1)
	xorps	%xmm14,%xmm6
	movups	-80(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
	movdqa	160-128(%rax),%xmm0
	paddd	%xmm11,%xmm0
.byte	69,15,56,205,227
.byte	69,15,56,204,234
	movups	-64(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
.byte	15,56,203,209
	pshufd	$0x0e,%xmm0,%xmm0
	movdqa	%xmm12,%xmm3
.byte	102,65,15,58,15,219,4
	paddd	%xmm3,%xmm13
	movups	-48(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
.byte	15,56,203,202
	movdqa	192-128(%rax),%xmm0
	paddd	%xmm12,%xmm0
.byte	69,15,56,205,236
.byte	69,15,56,204,211
	movups	-32(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
.byte	15,56,203,209
	pshufd	$0x0e,%xmm0,%xmm0
	movdqa	%xmm13,%xmm3
.byte	102,65,15,58,15,220,4
	paddd	%xmm3,%xmm10
	movups	-16(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
.byte	15,56,203,202
	movdqa	224-128(%rax),%xmm0
	paddd	%xmm13,%xmm0
.byte	69,15,56,205,213
.byte	69,15,56,204,220
	movups	0(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
.byte	15,56,203,209
	pshufd	$0x0e,%xmm0,%xmm0
	movdqa	%xmm10,%xmm3
.byte	102,65,15,58,15,221,4
	paddd	%xmm3,%xmm11
	movups	16(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
.byte	15,56,203,202
	movdqa	256-128(%rax),%xmm0
	paddd	%xmm10,%xmm0
.byte	69,15,56,205,218
.byte	69,15,56,204,229
	movups	32(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
.byte	15,56,203,209
	pshufd	$0x0e,%xmm0,%xmm0
	movdqa	%xmm11,%xmm3
.byte	102,65,15,58,15,218,4
	paddd	%xmm3,%xmm12
	movups	48(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
	cmpl	$11,%r11d
	jb	L$aesenclast2
	movups	64(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
	movups	80(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
	je	L$aesenclast2
	movups	96(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
	movups	112(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
L$aesenclast2:
	aesenclast	%xmm5,%xmm6
	movups	16-112(%rcx),%xmm4
	nop
.byte	15,56,203,202
	movups	32(%rdi),%xmm14
	xorps	%xmm15,%xmm14
	movups	%xmm6,16(%rsi,%rdi,1)
	xorps	%xmm14,%xmm6
	movups	-80(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
	movdqa	288-128(%rax),%xmm0
	paddd	%xmm11,%xmm0
.byte	69,15,56,205,227
.byte	69,15,56,204,234
	movups	-64(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
.byte	15,56,203,209
	pshufd	$0x0e,%xmm0,%xmm0
	movdqa	%xmm12,%xmm3
.byte	102,65,15,58,15,219,4
	paddd	%xmm3,%xmm13
	movups	-48(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
.byte	15,56,203,202
	movdqa	320-128(%rax),%xmm0
	paddd	%xmm12,%xmm0
.byte	69,15,56,205,236
.byte	69,15,56,204,211
	movups	-32(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
.byte	15,56,203,209
	pshufd	$0x0e,%xmm0,%xmm0
	movdqa	%xmm13,%xmm3
.byte	102,65,15,58,15,220,4
	paddd	%xmm3,%xmm10
	movups	-16(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
.byte	15,56,203,202
	movdqa	352-128(%rax),%xmm0
	paddd	%xmm13,%xmm0
.byte	69,15,56,205,213
.byte	69,15,56,204,220
	movups	0(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
.byte	15,56,203,209
	pshufd	$0x0e,%xmm0,%xmm0
	movdqa	%xmm10,%xmm3
.byte	102,65,15,58,15,221,4
	paddd	%xmm3,%xmm11
	movups	16(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
.byte	15,56,203,202
	movdqa	384-128(%rax),%xmm0
	paddd	%xmm10,%xmm0
.byte	69,15,56,205,218
.byte	69,15,56,204,229
	movups	32(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
.byte	15,56,203,209
	pshufd	$0x0e,%xmm0,%xmm0
	movdqa	%xmm11,%xmm3
.byte	102,65,15,58,15,218,4
	paddd	%xmm3,%xmm12
	movups	48(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
.byte	15,56,203,202
	movdqa	416-128(%rax),%xmm0
	paddd	%xmm11,%xmm0
.byte	69,15,56,205,227
.byte	69,15,56,204,234
	cmpl	$11,%r11d
	jb	L$aesenclast3
	movups	64(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
	movups	80(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
	je	L$aesenclast3
	movups	96(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
	movups	112(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
L$aesenclast3:
	aesenclast	%xmm5,%xmm6
	movups	16-112(%rcx),%xmm4
	nop
.byte	15,56,203,209
	pshufd	$0x0e,%xmm0,%xmm0
	movdqa	%xmm12,%xmm3
.byte	102,65,15,58,15,219,4
	paddd	%xmm3,%xmm13
	movups	48(%rdi),%xmm14
	xorps	%xmm15,%xmm14
	movups	%xmm6,32(%rsi,%rdi,1)
	xorps	%xmm14,%xmm6
	movups	-80(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
	movups	-64(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
.byte	15,56,203,202

	movdqa	448-128(%rax),%xmm0
	paddd	%xmm12,%xmm0
.byte	69,15,56,205,236
	movdqa	%xmm7,%xmm3
	movups	-48(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
.byte	15,56,203,209
	pshufd	$0x0e,%xmm0,%xmm0
	movups	-32(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
.byte	15,56,203,202

	movdqa	480-128(%rax),%xmm0
	paddd	%xmm13,%xmm0
	movups	-16(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
	movups	0(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
.byte	15,56,203,209
	pshufd	$0x0e,%xmm0,%xmm0
	movups	16(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
.byte	15,56,203,202

	movups	32(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
	movups	48(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
	cmpl	$11,%r11d
	jb	L$aesenclast4
	movups	64(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
	movups	80(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
	je	L$aesenclast4
	movups	96(%rcx),%xmm4
	aesenc	%xmm5,%xmm6
	movups	112(%rcx),%xmm5
	aesenc	%xmm4,%xmm6
L$aesenclast4:
	aesenclast	%xmm5,%xmm6
	movups	16-112(%rcx),%xmm4
	nop

	paddd	%xmm9,%xmm2
	paddd	%xmm8,%xmm1

	decq	%rdx
	movups	%xmm6,48(%rsi,%rdi,1)
	leaq	64(%rdi),%rdi
	jnz	L$oop_shaext

	pshufd	$0xb1,%xmm2,%xmm2
	pshufd	$0x1b,%xmm1,%xmm3
	pshufd	$0xb1,%xmm1,%xmm1
	punpckhqdq	%xmm2,%xmm1
.byte	102,15,58,15,211,8

	movups	%xmm6,(%r8)
	movdqu	%xmm1,(%r9)
	movdqu	%xmm2,16(%r9)
	.byte	0xf3,0xc3
